In [None]:
#@@@@@@@@@@@@@@@ Based on deeplearning course material by Andrew Ng @@@@@@@@@@@@@@@@@@@@@@@@@@
#A very simple implementation of Forward and Backward propagation with no deep learning library 
#In this code you can find implementations for 
#-------A sigle hidden layer Neural Network 
#-------Random Weight initialization 
#-------The idea of Vectorization to get rid of unnecessary for loops
#-------Activation functions [Relu, Leaky_Relu, tanh and sigmoid]
#-------Derivatives of activation functions [Relu, Leaky_Relu, tanh and sigmoid]
#-------Forward propagation 
#-------Backward propagation with gradient descent 
#-------Cost function 

In [1]:
#import libraries
import numpy as np
import math

In [2]:
#prepare the training sets [simple example]
X = [[1,1,1,1],[0,0,0,0],[1,1,0,0],[0,1,0,1]] #let equal number of a's and b's be class 0 and class 1 otherwise 
Y = [1,1,0,0]

In [3]:
#Setup the hidden layers 
a1 = np.zeros((4,4)) #activations at first hidden layer [four hidden units and four inputs(weights)]
a2 = np.zeros((1,4)) #activations at the output layer [one hidden unit and four inputs(weights)]
z1 = np.zeros((4,4)) #linear funtions at the first hidden layer [four hidden units and four inputs(weights)]
z2 = np.zeros((1,4)) #linear funtions at the output layer [one hidden unit and four inputs(weights)]

In [4]:
#initialze the parameters 
w1 = np.random.randn(4,4)*0.01
b1 = np.zeros((4,1))
w2 = np.random.randn(4,1)*0.01
b2 = 0 #single bias on the output node

In [5]:
#implement activation functions sigmoid, relu, tanh and leakyrelu
sigmoid = np.vectorize(lambda val: 1/(1 + math.exp(-val)))

tanh = np.vectorize(lambda val: (math.exp(val) - math.exp(-val))/(math.exp(val) + math.exp(-val)))

relu = np.vectorize(lambda val: max(0, val))

leaky_relu = np.vectorize(lambda val: max(0.0001*val, val))

In [6]:
#implement derivations of each activation functions
def der_sigmoid(z):
    return (1/(1+math.exp(-z)))*(1-(1+math.exp(-z))) 

def der_tanh(z):    
    return 1-(math.tanh(z)*math.tanh(z))

def der_relu(z):
    z[z>0]=1
    z[z<0]=0
    z[z==0]=0.00000000000001 
    return z

def der_leaky_relu(z): 
    z[z>0]=1
    z[z<0]=0.01 #can even be smaller than this number 
    z[z==0]=0.00000000000001 
    return z

In [7]:
iteration = 2000
learning_rate = 0.01

for i in range(0, iteration):
    #make prediction with forward prop for all the observations 
    z1 = np.transpose(w1).dot(X) + b1 
    a1 = relu(z1)
    z2 = np.transpose(w2).dot(a1) + b2 
    a2 = sigmoid(z2) #last layer with sigmoid activation function
    
    #now calculate the cost function j_wb 
    sum = 0;
    for i in range(0, 4):
        sum += (-(Y[i]*math.log(a2[0][1])+ (1-Y[i])*math.log(1-a2[0][1]) ))
        j_wb = sum/4
    print("Training Loss:", str(j_wb))
    
    #Gradient descent with back propagation    
    dz2 = a2 - Y #derivated through the chain rule of derivatives [derivative of sigmoid activation function * derivative of logistic cost function]
    dw2 = (1/4)*np.dot(dz2,np.transpose(a1))
    db2 = (1/4)*np.sum(dz2, axis=1, keepdims = True)
    w2 = w2 - np.transpose(learning_rate*(dw2)) #update the weights 
    b2 = b2 - np.transpose(learning_rate*(db2)) #update the bias 

    dz1 = w2.dot(dz2)*der_relu(a1) #w2 must have been transpossed in this function but it is already transposed at the forward pass phase above 
    dw1 = (1/4)*(dz1.dot(np.transpose(X)))
    db1 = (1/4)*(np.sum(dz1, axis=1, keepdims = True))
    w1 = w1 - np.transpose(learning_rate*(dw1)) #update the weights 
    b1 = b1 - np.transpose(learning_rate*(db1)) #update the bias 

Training Loss: 0.693147186754307
Training Loss: 0.6931471866582404
Training Loss: 0.6931471865633378
Training Loss: 0.6931471864695873
Training Loss: 0.693147186376977
Training Loss: 0.6931471862854954
Training Loss: 0.6931471861951313
Training Loss: 0.6931471861058727
Training Loss: 0.6931471860177087
Training Loss: 0.6931471859306281
Training Loss: 0.6931471858446197
Training Loss: 0.6931471857596723
Training Loss: 0.6931471856757758
Training Loss: 0.6931471855929189
Training Loss: 0.693147185511091
Training Loss: 0.6931471854302819
Training Loss: 0.6931471853504809
Training Loss: 0.6931471852716777
Training Loss: 0.6931471851938622
Training Loss: 0.6931471851170243
Training Loss: 0.6931471850411541
Training Loss: 0.6931471849662414
Training Loss: 0.6931471848922766
Training Loss: 0.6931471848192499
Training Loss: 0.6931471847471516
Training Loss: 0.6931471846759725
Training Loss: 0.693147184605703
Training Loss: 0.6931471845363337
Training Loss: 0.6931471844678558
Training Loss: 0.6

Training Loss: 0.6931471823274986
Training Loss: 0.6931471823504186
Training Loss: 0.6931471823734905
Training Loss: 0.6931471823967149
Training Loss: 0.6931471824200921
Training Loss: 0.693147182443623
Training Loss: 0.6931471824673079
Training Loss: 0.6931471824911475
Training Loss: 0.6931471825151425
Training Loss: 0.6931471825392934
Training Loss: 0.6931471825636009
Training Loss: 0.6931471825880654
Training Loss: 0.6931471826126878
Training Loss: 0.6931471826374688
Training Loss: 0.6931471826624089
Training Loss: 0.6931471826875087
Training Loss: 0.6931471827127693
Training Loss: 0.693147182738191
Training Loss: 0.6931471827637747
Training Loss: 0.6931471827895211
Training Loss: 0.693147182815431
Training Loss: 0.6931471828415052
Training Loss: 0.6931471828677441
Training Loss: 0.693147182894149
Training Loss: 0.6931471829207204
Training Loss: 0.6931471829474591
Training Loss: 0.6931471829743661
Training Loss: 0.693147183001442
Training Loss: 0.6931471830286876
Training Loss: 0.69

Training Loss: 0.6931472239122476
Training Loss: 0.6931472242435625
Training Loss: 0.6931472245775363
Training Loss: 0.6931472249141918
Training Loss: 0.6931472252535518
Training Loss: 0.6931472255956397
Training Loss: 0.6931472259404787
Training Loss: 0.6931472262880924
Training Loss: 0.6931472266385047
Training Loss: 0.6931472269917395
Training Loss: 0.6931472273478209
Training Loss: 0.6931472277067734
Training Loss: 0.6931472280686217
Training Loss: 0.6931472284333907
Training Loss: 0.6931472288011054
Training Loss: 0.6931472291717911
Training Loss: 0.6931472295454733
Training Loss: 0.6931472299221777
Training Loss: 0.6931472303019305
Training Loss: 0.6931472306847578
Training Loss: 0.6931472310706861
Training Loss: 0.6931472314597419
Training Loss: 0.6931472318519523
Training Loss: 0.6931472322473442
Training Loss: 0.6931472326459454
Training Loss: 0.6931472330477834
Training Loss: 0.6931472334528859
Training Loss: 0.6931472338612812
Training Loss: 0.6931472342729976
Training Loss:

Training Loss: 0.6931485471776744
Training Loss: 0.6931485593545529
Training Loss: 0.6931485716430572
Training Loss: 0.6931485840442309
Training Loss: 0.6931485965591275
Training Loss: 0.6931486091888107
Training Loss: 0.6931486219343536
Training Loss: 0.6931486347968403
Training Loss: 0.6931486477773646
Training Loss: 0.693148660877031
Training Loss: 0.6931486740969541
Training Loss: 0.6931486874382595
Training Loss: 0.6931487009020834
Training Loss: 0.693148714489572
Training Loss: 0.6931487282018838
Training Loss: 0.693148742040187
Training Loss: 0.6931487560056614
Training Loss: 0.6931487700994982
Training Loss: 0.6931487843228994
Training Loss: 0.6931487986770786
Training Loss: 0.6931488131632613
Training Loss: 0.6931488277826839
Training Loss: 0.693148842536595
Training Loss: 0.6931488574262552
Training Loss: 0.6931488724529367
Training Loss: 0.6931488876179237
Training Loss: 0.693148902922513
Training Loss: 0.6931489183680133
Training Loss: 0.693148933955746
Training Loss: 0.693

Training Loss: 0.6932619156513152
Training Loss: 0.6932629996686105
Training Loss: 0.6932640939099552
Training Loss: 0.693265198470824
Training Loss: 0.6932663134475636
Training Loss: 0.6932674389374018
Training Loss: 0.6932685750384546
Training Loss: 0.6932697218497346
Training Loss: 0.6932708794711582
Training Loss: 0.6932720480035532
Training Loss: 0.6932732275486685
Training Loss: 0.6932744182091803
Training Loss: 0.6932756200887016
Training Loss: 0.6932768332917889
Training Loss: 0.6932780579239523
Training Loss: 0.6932792940916623
Training Loss: 0.6932805419023591
Training Loss: 0.6932818014644607
Training Loss: 0.6932830728873713
Training Loss: 0.6932843562814905
Training Loss: 0.6932856517582211
Training Loss: 0.6932869594299786
Training Loss: 0.6932882794101998
Training Loss: 0.6932896118133516
Training Loss: 0.6932909567549398
Training Loss: 0.6932923143515185
Training Loss: 0.6932936847206987
Training Loss: 0.6932950679811585
Training Loss: 0.6932964642526508
Training Loss: 

In [28]:
#make a prediction[single forward pass with the last weights]
X_test = [1,1,1,0]
z1 = np.transpose(w1[0]).dot(X_test) + b1[0] 
a1 = relu(z1)
z2 = np.transpose(w2).dot(a1) + b2 
a2 = sigmoid(z2) 
print("Predicted class: ", str(round(a2[0][0], 0))) #bad result is expected as the model is trained only with 4 observations 

Predicted class:  0.0


In [8]:
#Further extension
#------Use big training data X
#------Accept parameters from users input_shape, hidden layers, hidden units and others