# Neural Network

Implement the back-propagation algorithm to learn the weights of a perceptron with 2 input nodes, 2 hidden nodes and 1 output node.

Note: **Python3** in used.

## OR 

In [12]:
# imports
import numpy as np

In [13]:
# parameters - input all parameter values here
input_dim = 2
hidden_dim = 2 # dimensions of hidden layers
std = 0.01  # train data noise standard deviation
w_std = 0.5
learn_rate = 0.01

In [14]:
# prepare training data
x_inputs = np.array([np.zeros(2), np.ones(2), np.array([1,0]), np.array([0,1])])
def generate_trainset(N):
    X = np.repeat(x_inputs, N//4, axis=0)
    y_or = np.logical_or(X.T[0], X.T[1]).astype(np.float)
    # add noise to data
    X += np.random.normal(0, std, X.shape)
    y_or += np.random.normal(0, std, N)
    # shuffle the training data
    indices = np.arange(N)
    np.random.shuffle(indices)
    x_train, y_train = X[indices], y_or[indices]
    return x_train, y_train

In [15]:
def sigmoid( t):
    return 1/(1  + np.exp(-t))

def dsigmoid( t):
    return sigmoid(t)*(1 - sigmoid(t))

######  Experiment with N = 1000

In [16]:
N = 1000
x_train, y_train = generate_trainset(N)

In [17]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 300 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/300  Loss:  [0.19238834]
Epoch:  2/300  Loss:  [0.1904154]
Epoch:  3/300  Loss:  [0.18910905]
Epoch:  4/300  Loss:  [0.18831687]
Epoch:  5/300  Loss:  [0.18785601]
Epoch:  6/300  Loss:  [0.18757744]
Epoch:  7/300  Loss:  [0.18738627]
Epoch:  8/300  Loss:  [0.18723157]
Epoch:  9/300  Loss:  [0.18708879]
Epoch:  10/300  Loss:  [0.18694664]
Epoch:  11/300  Loss:  [0.18679986]
Epoch:  12/300  Loss:  [0.18664569]
Epoch:  13/300  Loss:  [0.18648229]
Epoch:  14/300  Loss:  [0.18630814]
Epoch:  15/300  Loss:  [0.18612162]
Epoch:  16/300  Loss:  [0.18592084]
Epoch:  17/300  Loss:  [0.1857035]
Epoch:  18/300  Loss:  [0.18546669]
Epoch:  19/300  Loss:  [0.18520676]
Epoch:  20/300  Loss:  [0.18491914]
Epoch:  21/300  Loss:  [0.18459805]
Epoch:  22/300  Loss:  [0.18423632]
Epoch:  23/300  Loss:  [0.18382498]
Epoch:  24/300  Loss:  [0.18335296]
Epoch:  25/300  Loss:  [0.18280665]
Epoch:  26/300  Loss:  [0.18216938]
Epoch:  27/300  Loss:  [0.18142094]
Epoch:  28/300  Loss:  [0.18053712]
Epo

Epoch:  228/300  Loss:  [0.00318218]
Epoch:  229/300  Loss:  [0.00316341]
Epoch:  230/300  Loss:  [0.00314487]
Epoch:  231/300  Loss:  [0.00312654]
Epoch:  232/300  Loss:  [0.00310843]
Epoch:  233/300  Loss:  [0.00309052]
Epoch:  234/300  Loss:  [0.00307282]
Epoch:  235/300  Loss:  [0.00305532]
Epoch:  236/300  Loss:  [0.00303802]
Epoch:  237/300  Loss:  [0.00302092]
Epoch:  238/300  Loss:  [0.003004]
Epoch:  239/300  Loss:  [0.00298728]
Epoch:  240/300  Loss:  [0.00297074]
Epoch:  241/300  Loss:  [0.00295438]
Epoch:  242/300  Loss:  [0.0029382]
Epoch:  243/300  Loss:  [0.0029222]
Epoch:  244/300  Loss:  [0.00290638]
Epoch:  245/300  Loss:  [0.00289072]
Epoch:  246/300  Loss:  [0.00287523]
Epoch:  247/300  Loss:  [0.00285991]
Epoch:  248/300  Loss:  [0.00284476]
Epoch:  249/300  Loss:  [0.00282976]
Epoch:  250/300  Loss:  [0.00281492]
Epoch:  251/300  Loss:  [0.00280024]
Epoch:  252/300  Loss:  [0.00278571]
Epoch:  253/300  Loss:  [0.00277133]
Epoch:  254/300  Loss:  [0.0027571]
Epoch:

In [18]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)

print(predict(x_inputs))
print(decision(x_inputs))

[[0.0690202 ]
 [0.97419909]
 [0.96353925]
 [0.96327857]]
[[0]
 [1]
 [1]
 [1]]


######  Experiment with N = 100
poor performance

In [19]:
N = 100
x_train, y_train = generate_trainset(N)
learn_rate = 0.01

In [20]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 250 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N) 

Epoch:  1/250  Loss:  [0.29375708]
Epoch:  2/250  Loss:  [0.27919393]
Epoch:  3/250  Loss:  [0.26619555]
Epoch:  4/250  Loss:  [0.25466529]
Epoch:  5/250  Loss:  [0.24448775]
Epoch:  6/250  Loss:  [0.23553979]
Epoch:  7/250  Loss:  [0.22769773]
Epoch:  8/250  Loss:  [0.22084165]
Epoch:  9/250  Loss:  [0.21485797]
Epoch:  10/250  Loss:  [0.20964097]
Epoch:  11/250  Loss:  [0.20509364]
Epoch:  12/250  Loss:  [0.20112811]
Epoch:  13/250  Loss:  [0.19766572]
Epoch:  14/250  Loss:  [0.19463677]
Epoch:  15/250  Loss:  [0.19198016]
Epoch:  16/250  Loss:  [0.18964271]
Epoch:  17/250  Loss:  [0.1875785]
Epoch:  18/250  Loss:  [0.18574805]
Epoch:  19/250  Loss:  [0.18411764]
Epoch:  20/250  Loss:  [0.18265848]
Epoch:  21/250  Loss:  [0.18134613]
Epoch:  22/250  Loss:  [0.18015982]
Epoch:  23/250  Loss:  [0.17908194]
Epoch:  24/250  Loss:  [0.17809755]
Epoch:  25/250  Loss:  [0.17719397]
Epoch:  26/250  Loss:  [0.17636045]
Epoch:  27/250  Loss:  [0.17558786]
Epoch:  28/250  Loss:  [0.17486842]
Ep

Epoch:  238/250  Loss:  [0.14766345]
Epoch:  239/250  Loss:  [0.14763402]
Epoch:  240/250  Loss:  [0.14760473]
Epoch:  241/250  Loss:  [0.14757555]
Epoch:  242/250  Loss:  [0.14754649]
Epoch:  243/250  Loss:  [0.14751755]
Epoch:  244/250  Loss:  [0.14748872]
Epoch:  245/250  Loss:  [0.14746]
Epoch:  246/250  Loss:  [0.14743137]
Epoch:  247/250  Loss:  [0.14740285]
Epoch:  248/250  Loss:  [0.14737442]
Epoch:  249/250  Loss:  [0.14734608]
Epoch:  250/250  Loss:  [0.14731783]


In [21]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)

print(predict(x_inputs))
print(decision(x_inputs))

[[0.6746005 ]
 [0.80680235]
 [0.77660176]
 [0.78777763]]
[[1]
 [1]
 [1]
 [1]]
