# No hidden layers

In [2]:
import numpy as np
import math

# sigmoid function
def sigmoid(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))

# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1],
                [1,0,1]])
    
# output dataset            
y = np.array([[0,0,1,1,1]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
W = np.random.randn(3, 1)

for iter in xrange(1000):

    # forward propagation
    A = np.dot(X,W)
    H = sigmoid(A)
    
    # how much did we miss?
    L = 0.5 * ( y - H ) ** 2
    
    dLdH = -1.0 * (y - H)
    
    dHdA = sigmoid(H, True)

    # multiply how much we missed by the
    # slope of the sigmoid at the values in l1
    dLdA = dLdH * dHdA
    
    # xij = observation i, feature j
    # A1 = w1 * x11 + w2 * x12 + w3 * x13
    # A2 = w1 * x21 + w2 * x22 + w3 * x23
    # A3 = w1 * x31 + w2 * x32 + w3 * x33
    
    dAdW = X.T
    
    # 3 x 1 = 3 x n * n x 1
    dLdW = np.dot(dAdW, dLdA)
    # Weight i, observation j is updated by sum(j=1^n) (Xi * dLdA),
    # where j runs over all the observations
    # times the product of the other derivatives,
    # as we expect 
    
    # dA / dW = [[x11 x12 x13]
    #            [x21 x22 x23]
    #            ...
    #            [xn1 xn2 xn3]]
    # dLdA = [A1 A2 ... An]
    
    # update weights
    # w1 -= A1 * x11 + A2 * x21 + ... + An * xn1
    # w2 -= A1 * x12 + A2 * x22 + ... + An * xn2
    # w3 -= A1 * x13 + A2 * x23 + ... + An * xn3
    W -= dLdW

print "Output After Training:"
print W
print H

Output After Training:
[[ 7.46187116]
 [-0.37718946]
 [-3.36186709]]
[[ 0.03352639]
 [ 0.02323634]
 [ 0.9836892 ]
 [ 0.97639195]
 [ 0.9836892 ]]


# One hidden layer

In [1]:
import numpy as np

def nonlin(x,deriv=False):
	if(deriv==True):
	    return x*(1-x)
	return 1/(1+np.exp(-x))
    
X = np.array([[0,0,1],
            [0,1,1],
            [1,0,1],
            [1,1,1],
             [1,1,0],
             [0,0,0],
             [0,1,0]])
                
y = np.array([[0],
              [0],
              [1],
              [1],
              [1],
              [1],
             [1]])

np.random.seed(1)

# randomly initialize our weights with mean 0
V = np.random.randn(3, 4)
W = np.random.randn(4, 1)

for j in xrange(60000):

	# Feed forward through layers 0, 1, and 2
    A1 = np.dot(X,V)
    A2 = nonlin(A1)
    B1 = np.dot(A2,W)
    B2 = nonlin(B1)

    # how much did we miss the target value?
    L = 0.5 * (y - B2) ** 2
    
    if ( j % 10000) == 0:
        print "Error:" + str(np.mean(np.abs(L)))
        
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    dLdB2 = -1.0 * (y-B2)
    
    dB2dB1 = nonlin(B2,deriv=True)

    dLdB1 = dLdB2*dB2dB1

    dB1dW = A2.T
    
    dLdW = dB1dW.dot(dLdB1)
    
    dB1dA2 = W.T
    
    dLdA2 = dLdB1.dot(dB1dA2)
    
    dA2dA1 = nonlin(A2,deriv=True)
    
    dLdA1 = dLdA2 * dA2dA1
    
    dA1dV = X.T
    
    dLdV = dA1dV.dot(dLdA1)
    
    # W (4 x 1) = A2.T (4 x n) * B2_delta (n x 1)
    W -= dLdW
    
    # V (3 x 4) = X.T (3 x n) * A1_delta (n x 4)
    V -= dLdV

Error:0.126916753852
Error:1.92407935252e-05
Error:9.46304363495e-06
Error:6.26225354584e-06
Error:4.67565755863e-06
Error:3.72887254869e-06
