# Stochastic Gradient Descent in Neural Network
## Working with 2dimensions of data; (x,y)

In [1]:
# import dependecies, only numpy for now
import numpy as np

<img src="sigmoidfunct.jpg">

In [2]:
# Activation function as sigmoid. Defining the sigmoid
# when deriv is False, it is feedforward
# when deriv is True, it is backpropagate, the derivation of sigmoid function
def nonlin(x,deriv=False):
    if(deriv == True):
        return x*(1-x)
    return 1/(1 + np.exp(-x))

In [3]:
# Input dataset
X = np.array([[0,0,1],[0,1,1],[1,0,1],[1,1,1]])

# Target or output dataset
y = np.array([[0,1,1,0]]).T

# Seed random number to make it deterministic for future validation
np.random.seed(1)

## Simple neural network
### 1 hidden layer

In [4]:
# Randomly weights initialization
weight_1 = 2*np.random.random((3,1)) - 1

In [5]:
# Iteration
xrange = np.arange(100000)

# Neural Network
for iter in xrange:
    
    # forward propagation
    layer_0 = X
    layer_1 = nonlin(np.dot(layer_0, weight_1))
    
    # Error Calculation for layer_1
    layer_1_err = layer_1 - y
    
    # Backpropagation when (deriv=True)
    # multiply the error with the 'derivative of activation function'
    layer_1_der = layer_1_err * nonlin(layer_1, True)
    weight_slope = np.dot(layer_0.T, layer_1_der)
    
    # update the weights
    weight_1 -= weight_slope
    
print ("Weight_1 after {} times is \n {}".format(len(xrange), weight_1))
print ("Desired output is \n {}".format(y))
print ("Calcualted outputs after {} times is \n {}".format(len(xrange), layer_1))

Weight_1 after 100000 times is 
 [[  2.08166817e-16]
 [  2.22044605e-16]
 [ -3.05311332e-16]]
Desired output is 
 [[0]
 [1]
 [1]
 [0]]
Calcualted outputs after 100000 times is 
 [[ 0.5]
 [ 0.5]
 [ 0.5]
 [ 0.5]]


## Neural network with SGD and alpha
### with 2 hidden
1. Forward propagation for all layers
2. Backpropagation by
    (1) Error calculation of the most right layer
    (2) Derivation of that layer (error*derivation(weight))
    (3) Step (1) but for second most right layer
    (4) step (2) but for second most right layer
    (5) Follow the sequence for all remaining layers
    
#### The forward propagation is easy because we don't need error calculation. While backpropagation has to be done step by step for each layer

In [6]:
# Initialization of alpha
alphas = [0.001, 0.01, 0.1, 0.5, 1, 10, 100]

<img src="2layers.jpg">

In [7]:
# Iteration with alpha
for alpha in alphas:
    print ("\n Training with alpha {}".format(alpha))
    np.random.seed(1)
    
    # Randomly weights initialization
    weight_1 = 2*np.random.random((3,4)) - 1
    weight_2 = 2*np.random.random((4,1)) - 1
    
    # iteration
    krange = np.arange(60000)
    
    # Neural network with different alpha effect
    for j in krange:
        
        ## Forward propagation step ##
        layer_0 = X
        layer_1 = nonlin(np.dot(layer_0, weight_1))
        layer_2 = nonlin(np.dot(layer_1, weight_2))
        
        ## Backpropagation step ## 
        # Step (1)
        layer_2_err = layer_2 - y
        
        # To output error calculation for every 10000 iterations 
        if (j% 10000) == 0:
            print("Error after {} iterations {}".format(j, np.mean(np.abs(layer_2_err))))
        
        # Step (2)
        layer_2_der = layer_2_err * nonlin(layer_2, True)
        
        # Step (3)
        layer_1_err = layer_2_der.dot(weight_2.T)
        
        # Step (4)
        layer_1_der = layer_1_err * nonlin(layer_1, True) 
        
        # update the weights
        weight_2 -= alpha * (layer_1.T.dot(layer_2_der))
        weight_1 -= alpha * (layer_0.T.dot(layer_1_der))


 Training with alpha 0.001
Error after 0 iterations 0.49641003190272537
Error after 10000 iterations 0.49516402549338606
Error after 20000 iterations 0.4935960431880486
Error after 30000 iterations 0.4916063585594306
Error after 40000 iterations 0.48910016654420474
Error after 50000 iterations 0.48597785784615843

 Training with alpha 0.01
Error after 0 iterations 0.49641003190272537
Error after 10000 iterations 0.45743107444190134
Error after 20000 iterations 0.359097202563399
Error after 30000 iterations 0.23935813715897253
Error after 40000 iterations 0.1430706590133703
Error after 50000 iterations 0.09859642980892719

 Training with alpha 0.1
Error after 0 iterations 0.49641003190272537
Error after 10000 iterations 0.042888017000115755
Error after 20000 iterations 0.02409899422852161
Error after 30000 iterations 0.018110652146797843
Error after 40000 iterations 0.014987616272210912
Error after 50000 iterations 0.013014490538142586

 Training with alpha 0.5
Error after 0 iterations