In [0]:
import numpy as np

In [0]:
#Activation Functions


#'der' refers to Derivative of the corresponding Function - X is the Cached Value of Activation Function
def activation_sigmoid(X,der=False):
    if not der:
        return np.divide(1, 1 + np.exp(-X) )
    else:
        return np.multiply(X,(1-X))

def activation_linear(X,der=False):
    if not der:
        return X
    else:
        return np.ones(X.shape)


def activation_relu(X,der=False):
    if not der:
        return np.multiply((X>=0),X)
    else:
        return (X>=0).astype(float)

In [0]:
#Cost Functions

def crossEntropyCost(Y,Y_orig,der=False):
    if not der:
        m=Y.shape[1]
        cost = -np.sum(Y_orig*np.log(Y)+(1-Y_orig)*np.log(1-Y))/m
        cost = np.squeeze(cost)
        assert(cost.shape == ())
        return cost
    else:
        dLda = -(np.divide(Y_orig, Y) - np.divide(1 - Y_orig, 1 - Y))  
        return dLda


In [0]:
class NeuralNetwork():
    def __init__(self):
        self.layerSizes = []
        self.activations = []

    def debugPrint(self,x):
        if self.debug:
            print(x)

    def compile(self,lr=0.9,debug=False):

        self.debug=debug
        self.lr = lr

        # Initialize Empty Arrays to Store Weights and Biases
        self.totalLayers = len(self.layerSizes)-1
        self.weights = [None]*self.totalLayers
        self.bias = [None]*self.totalLayers

        # Initialize Weight and Bias Matrix 
        for i in range (len(self.layerSizes)-1):

            # Dimensions of Weight Matrix : (layer i+1 , layer i),Bias:(layer i,1)

            self.weights[i]=np.random.randn(self.layerSizes[i+1],self.layerSizes[i])*0.01
            self.bias[i]=np.random.randn(self.layerSizes[i+1],1)*0.01
            
            assert(self.weights[i].shape==(self.layerSizes[i+1],self.layerSizes[i]))
            assert(self.bias[i].shape==(self.layerSizes[i+1],1))

        print("Model Compiled Successfully")

    def addLayer(self,layerSize,activation=None):
        self.layerSizes.append(layerSize)

        if len(self.layerSizes)==1: # No Activation on Input Layer
            assert(activation==None) 
        else:
            assert(activation!=None) # Activation Required
            self.activations.append(activation)

    
    def forward(self,X):
        Z = None
        A = X
        self.activationCache = [] # Used to Compute Gradients

        self.debugPrint("=================Model Meta=====================")
        self.debugPrint(f"Layers(Including Input) :{self.layerSizes}")
        self.debugPrint(f"Activations:{self.activations}")
        self.activationCache.append(X)
        # Forward Propogate through Each Layer
        for i in range(self.totalLayers):
            Z = np.dot(self.weights[i],A)+self.bias[i] # Z= W.X + b
            A = self.activations[i](Z)                  # A = activation(Z)
            self.activationCache.append(A)

            self.debugPrint("======================================================")
            self.debugPrint(f"Layer #{i+1} Output:\n{A}")
        self.debugPrint("================== End Of Model =================================")
        self.last_Output = A
        return A


    def fit(self,X,Y,epochs=1):
        # Feed Forward
        result = self.forward(X)
        
        #Compute Cost
        cost = crossEntropyCost(result,Y)
        print(f"Initial Cost:{cost}")

        for i in range(epochs):
            # Feed Forward
            result = self.forward(X)

            #Compute Gradients
            dWs,dbs = self.compute_grads(Y)

            #Apply Gradients
            self.apply_grads(dWs,dbs)

            # Compute Cost
            result = self.forward(X)
            cost = crossEntropyCost(result,Y)
            print(f"Epoch ({i}/{epochs-1})=> Cost:{cost}")



    def apply_grads(self,dWs,dbs):
        for i in range(self.totalLayers):
            self.weights[i] -= self.lr * dWs[i]
            self.bias[i] -= self.lr * dbs[i]



    def compute_grads(self,Y): # Returns Cost and Grads(dws,dbs)

        def linear_backward(dZ,cached_activation,weights,bias): #Calculate grads for Layer i
            #cached_activation(activation in layer i),weights(weight Matrix of ith layer),bias(bias matrix of ith layer)
            m=cached_activation.shape[1] #For Vetorized Implementation
            # print(f"dZ = {dZ}")
            # print(f"Cached Act:{cached_activation}")
            dW = np.dot(dZ,cached_activation.T)/m
            db = np.sum(dZ,axis=1,keepdims=True)
            dA = np.dot(weights.T,dZ)
            # print(f"dW = {dW}")

            assert (dW.shape == weights.shape)
            assert (db.shape == bias.shape)
            assert (dA.shape == cached_activation.shape)
            

            return (dW,db,dA)

        
        dLda = crossEntropyCost(self.last_Output,Y,der=True)


        dZs = [None]*self.totalLayers
        dWs = [None]*self.totalLayers
        dbs = [None]*self.totalLayers
        

        dZs[-1]=dLda
        # print(f"dZs:{dZs}")
        for l in reversed(range(self.totalLayers)):
            # print(f"Layer #{l} : {self.layerSizes[l]}")
            dadz = self.activations[l](self.activationCache[l+1],der=True)
            layer_cache = self.activationCache[l]
            dldz = np.multiply(dZs[l],dadz)
            dw,db,dA = linear_backward(dldz,layer_cache,self.weights[l],self.bias[l])
            dZs[l-1]=dA
            dWs[l]=dw
            dbs[l]=db

        return dWs,dbs




nn = NeuralNetwork()
nn.addLayer(2)
nn.addLayer(6,activation_sigmoid)
nn.addLayer(1,activation_sigmoid)
nn.compile(lr=2.15)

X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[1],[0],[0],[1]])
print(f"Input:{X}")
# res = nn.forward(X)
# print(f"result:{res}")

print(f"Target:{Y}")

nn.fit(X.T,Y.T,2000)


array([[0.00022479]])

In [210]:
for i in reversed(range(5)):
    print(i)

4
3
2
1
0


In [179]:
a = np.array([1,2,3,4,5,6,7])
(a>3).astype(float)


array([0., 0., 0., 1., 1., 1., 1.])