# Neural Network

Implement the back-propagation algorithm to learn the weights of a perceptron with 2 input nodes, 2 hidden nodes and 1 output node.

Note: **Python3** in used.

## OR 

In [15]:
# imports
import numpy as np

In [28]:
# parameters - input all parameter values here
input_dim = 2
hidden_dim = 2 # dimensions of hidden layers
std = 0.01  # train data noise standard deviation
w_std = 0.5
learn_rate = 0.01

In [29]:
# prepare training data
x_inputs = np.array([np.zeros(2), np.ones(2), np.array([1,0]), np.array([0,1])])
def generate_trainset(N):
    X = np.repeat(x_inputs, N//4, axis=0)
    y_xor = np.logical_xor(X.T[0], X.T[1]).astype(np.float)
    # add noise to data
    X += np.random.normal(0, std, X.shape)
    y_xor += np.random.normal(0, std, N)
    # shuffle the training data
    indices = np.arange(N)
    np.random.shuffle(indices)
    x_train, y_train = X[indices], y_xor[indices]
    return x_train, y_train

In [30]:
def sigmoid( t):
    return 1/(1  + np.exp(-t))

def dsigmoid( t):
    return sigmoid(t)*(1 - sigmoid(t))

######  Experiment with N = 1000

In [31]:
N = 1000
x_train, y_train = generate_trainset(N)

In [32]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 300 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/300  Loss:  [0.24970636]
Epoch:  2/300  Loss:  [0.2496569]
Epoch:  3/300  Loss:  [0.24964112]
Epoch:  4/300  Loss:  [0.24963403]
Epoch:  5/300  Loss:  [0.24962876]
Epoch:  6/300  Loss:  [0.24962351]
Epoch:  7/300  Loss:  [0.2496179]
Epoch:  8/300  Loss:  [0.24961187]
Epoch:  9/300  Loss:  [0.24960545]
Epoch:  10/300  Loss:  [0.24959863]
Epoch:  11/300  Loss:  [0.24959138]
Epoch:  12/300  Loss:  [0.2495837]
Epoch:  13/300  Loss:  [0.24957552]
Epoch:  14/300  Loss:  [0.24956681]
Epoch:  15/300  Loss:  [0.24955751]
Epoch:  16/300  Loss:  [0.24954754]
Epoch:  17/300  Loss:  [0.24953684]
Epoch:  18/300  Loss:  [0.24952533]
Epoch:  19/300  Loss:  [0.24951291]
Epoch:  20/300  Loss:  [0.2494995]
Epoch:  21/300  Loss:  [0.24948497]
Epoch:  22/300  Loss:  [0.24946922]
Epoch:  23/300  Loss:  [0.24945209]
Epoch:  24/300  Loss:  [0.24943344]
Epoch:  25/300  Loss:  [0.2494131]
Epoch:  26/300  Loss:  [0.24939087]
Epoch:  27/300  Loss:  [0.24936654]
Epoch:  28/300  Loss:  [0.24933985]
Epoch:

Epoch:  227/300  Loss:  [0.10567449]
Epoch:  228/300  Loss:  [0.10550082]
Epoch:  229/300  Loss:  [0.10532872]
Epoch:  230/300  Loss:  [0.10515816]
Epoch:  231/300  Loss:  [0.10498912]
Epoch:  232/300  Loss:  [0.10482158]
Epoch:  233/300  Loss:  [0.10465551]
Epoch:  234/300  Loss:  [0.10449089]
Epoch:  235/300  Loss:  [0.1043277]
Epoch:  236/300  Loss:  [0.10416592]
Epoch:  237/300  Loss:  [0.10400553]
Epoch:  238/300  Loss:  [0.10384652]
Epoch:  239/300  Loss:  [0.10368886]
Epoch:  240/300  Loss:  [0.10353253]
Epoch:  241/300  Loss:  [0.10337752]
Epoch:  242/300  Loss:  [0.1032238]
Epoch:  243/300  Loss:  [0.10307136]
Epoch:  244/300  Loss:  [0.10292018]
Epoch:  245/300  Loss:  [0.10277025]
Epoch:  246/300  Loss:  [0.10262155]
Epoch:  247/300  Loss:  [0.10247406]
Epoch:  248/300  Loss:  [0.10232776]
Epoch:  249/300  Loss:  [0.10218264]
Epoch:  250/300  Loss:  [0.10203868]
Epoch:  251/300  Loss:  [0.10189587]
Epoch:  252/300  Loss:  [0.1017542]
Epoch:  253/300  Loss:  [0.10161364]
Epoc

In [33]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)

print(predict(x_inputs))
print(decision(x_inputs))

[[0.09480666]
 [0.5129879 ]
 [0.76515515]
 [0.76614306]]
[[0]
 [1]
 [1]
 [1]]


######  Experiment with N = 100
poor performance

In [39]:
N = 100
x_train, y_train = generate_trainset(N)
learn_rate = 0.01

In [40]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 250 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_xor[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N) 

Epoch:  1/250  Loss:  [0.11016344]
Epoch:  2/250  Loss:  [0.09819333]
Epoch:  3/250  Loss:  [0.08815396]
Epoch:  4/250  Loss:  [0.0796583]
Epoch:  5/250  Loss:  [0.07240785]
Epoch:  6/250  Loss:  [0.06617087]
Epoch:  7/250  Loss:  [0.06076607]
Epoch:  8/250  Loss:  [0.05605038]
Epoch:  9/250  Loss:  [0.05190999]
Epoch:  10/250  Loss:  [0.04825349]
Epoch:  11/250  Loss:  [0.04500694]
Epoch:  12/250  Loss:  [0.04211002]
Epoch:  13/250  Loss:  [0.03951312]
Epoch:  14/250  Loss:  [0.03717519]
Epoch:  15/250  Loss:  [0.03506202]
Epoch:  16/250  Loss:  [0.0331449]
Epoch:  17/250  Loss:  [0.0313996]
Epoch:  18/250  Loss:  [0.02980557]
Epoch:  19/250  Loss:  [0.02834527]
Epoch:  20/250  Loss:  [0.02700366]
Epoch:  21/250  Loss:  [0.02576778]
Epoch:  22/250  Loss:  [0.02462641]
Epoch:  23/250  Loss:  [0.02356981]
Epoch:  24/250  Loss:  [0.02258948]
Epoch:  25/250  Loss:  [0.02167797]
Epoch:  26/250  Loss:  [0.02082874]
Epoch:  27/250  Loss:  [0.020036]
Epoch:  28/250  Loss:  [0.01929464]
Epoch:

In [41]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)

print(predict(x_inputs))
print(decision(x_inputs))

[[0.0446088 ]
 [0.03235253]
 [0.0445691 ]
 [0.0323346 ]]
[[0]
 [0]
 [0]
 [0]]
