Building blocks of computations which can be reused to construct deeper complex DL 
models 

In [2]:
import numpy as np
from typing import Callable, List, Tuple, Dict

In [3]:
class Operation(object):
    '''
    Base class for an operation in a neural network
    '''
    def __init__(self):
        pass
    
    def forward(self, input_ : np.ndarray):
        
        '''
        Stores input in the self.input attribute. 
        store output of forward computation is self.output attribute
        '''
        self.input_ = input_
        self.output = self._output()
        return self.output
    
    def backward(self, output_grad : np.ndarray) -> np.ndarray:
        '''
        Calls the self._input_grad() function
        '''
        assert(self.output.shape == output_grad.shape)
        self.input_grad = self._input_grad(output_grad)
        assert(self.input_grad.shape == self.input_.shape)
        return self.input_grad
    
    def _output(self) -> np.ndarray:
        '''
        the output method must be defined for each operation
        '''
        raise NotImplementedError()
    
    def _input_grad(self,output_grad : np.ndarray) -> np.ndarray:
        '''
        the input_grad method must be defined for each operation
        '''
        raise NotImplementedError()



a class for operation that also involves parameters - for example matrix multiplication

In [8]:
class ParamOperation(Operation):
    def __init__(self,param : np.ndarray) -> np.ndarray:
        super().__init__()
        self.param = param
    
    def backward(self, output_grad : np.ndarray) -> np.ndarray:
        assert(self.output.shape == output_grad.shape)
    
        self.input_grad = self._input_grad(output_grad)
        self.param_grad = self._param_grad(output_grad)
        assert(self.input_grad.shape == self.input_.shape)
        assert(self.param_grad.shape == self.param.shape)

        return self.input_grad
    
    def _param_grad(self,output_grad : np.ndarray) -> np.ndarray :
        raise NotImplementedError()
  

Deep learing model : Building blocks on top of our base class \
for the neural network in the previous chapter, we need three different operations per layer : 
1. Matrix multiplication
2. Addition of a bias term
3. Non-linear activation function

In [9]:
class weightMultiply(ParamOperation):
    def __init__(self, W : np.ndarray):
        super().__init__(W)
    
    def _output(self) -> np.ndarray :
        return np.dot(self.input_, self.param)
        
    def _input_grad(self,output_grad : np.ndarray) -> np.ndarray:
        return np.dot(output_grad, np.transpose(self.param, (1,0)))
    
    def _param_grad(self,output_grad : np.ndarray) -> np.ndarray:
        return np.dot(np.transpose(self.input_,(1,0)), output_grad)
    

In [10]:
# Addition of bias term
class BiasAdd(ParamOperation):
    def __init__(self,B:np.ndarray):
        assert B.shape[0] == 1
        super().__init__(B)
    
    def _output(self):
        return self.input_ + self.param
    
    def _input_grad(self, output_grad : np.ndarray) -> np.ndarray :
        return np.ones_like(self.input_) *  output_grad
    
    def _param_grad(self, output_grad : np.ndarray) -> np.ndarray :
        param_grad = np.ones_like(self.param) * output_grad
        return np.sum(param_grad, axis=0).reshape(1, self.param.shape[1])

In [7]:
# sigmoid activation layer

class Sigmoid(Operation):
    def __init__(self) -> None:
        super().__init__()
    
    def _output(self) -> np.ndarray:
        return (1.0/(1.0+np.exp(-1.0 * self.input_)))
    
    def _input_grad(self, output_grad : np.ndarray) -> np.ndarray:
        sigmoid_backward = self.output * (1-self.output) # derivative of sigmoid(x) = sigmoid(x) * (1-sigmoid(x))
        return (sigmoid_backward * output_grad)
    
    


    

    
    
        





the above operations constitute a layer -> we could now write a Layer class.

In [11]:
# abstract layer class
class Layer(object):
    def __init__(self, neurons : int) :
        self.first = True # first layer or not
        self.neurons = neurons
        self.params : List[np.ndarray] = []
        self.param_grads : List[np.ndarray] = []
        self.operations : List[Operation] = []
    
    def _setup_layer(self, input_ : np.ndarray) -> None:
        raise NotImplementedError() # to be filled in derived class
    
    def forward(self, input_ : np.ndarray) -> np.ndarray : 
        if self.first:
            self._setup_layer(input_)
            self.first = False
        self.input_ = input_
        for operation in self.operations: 
            input_ = operation.forward(input_)
        
        self.output = input_
        return self.output
    
    def backward(self, output_grad : np.ndarray) -> np.ndarray :
        assert(self.output.shape == output_grad.shape)

        for operation in reversed(self.operations):
            output_grad = operation.backward(output_grad)
        
        input_grad = output_grad
        self._param_grads()
        return input_grad
    
    def _param_grads(self):
        self.param_grads = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.param_grads.append(operation.param_grad)
            
    def _params(self):
        self.params = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.params.append(operation.param)

        
        
        
    


we have the base class Layer and multiple layers with different functionalities can be subclassed from this class . example is the dense layer class, which is just matrix multiplication of input neurons with output neurons. Let us start writing the class

In [None]:
class Dense(Layer):
    def __init__(self, neurons : int, activation : Operation = Sigmoid()) -> None:
        super().__init__(neurons)
        self.activation = activation
    