# Neural Network

Implement the back-propagation algorithm to learn the weights of a perceptron with 2 input nodes, 2 hidden nodes and 1 output node.

## XOR

Note: **Python3** in used.

In [1]:
# imports
import numpy as np

In [2]:
# parameters - input all parameter values here
# note: Unless a cell is found with changed values, the training cells the parameters here 
input_dim = 2
hidden_dim = 2 # dimensions of hidden layers
std = 0.005  # train data noise standard deviation
w_std = 0.5
learn_rate = 0.005

In [3]:
# prepare training data
x_inputs = np.array([np.zeros(2), np.ones(2), np.array([1,0]), np.array([0,1])])
def generate_trainset(N):
    X = np.repeat(x_inputs, N//4, axis=0)
    y_xor = np.logical_xor(X.T[0], X.T[1]).astype(np.float)
    # add noise to data
    X += np.random.normal(0, std, X.shape)
    y_xor += np.random.normal(0, std, N)
    # shuffle the training data
    indices = np.arange(N)
    np.random.shuffle(indices)
    x_train, y_train = X[indices], y_xor[indices]
    return x_train, y_train

In [4]:
def sigmoid( t):
    return 1/(1  + np.exp(-t))

def dsigmoid( t):
    return sigmoid(t)*(1 - sigmoid(t))

######  Experiment with N = 1000
This gives good performance. 

In [5]:
N = 1000
x_train, y_train = generate_trainset(N)

In [6]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))# [1,1], [1,0], [0,0], [0,1]
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 500 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/500  Loss:  [0.25778441]
Epoch:  2/500  Loss:  [0.25423825]
Epoch:  3/500  Loss:  [0.25228139]
Epoch:  4/500  Loss:  [0.25120912]
Epoch:  5/500  Loss:  [0.25062088]
Epoch:  6/500  Loss:  [0.25029642]
Epoch:  7/500  Loss:  [0.25011601]
Epoch:  8/500  Loss:  [0.25001467]
Epoch:  9/500  Loss:  [0.24995702]
Epoch:  10/500  Loss:  [0.24992369]
Epoch:  11/500  Loss:  [0.249904]
Epoch:  12/500  Loss:  [0.24989204]
Epoch:  13/500  Loss:  [0.2498845]
Epoch:  14/500  Loss:  [0.24987954]
Epoch:  15/500  Loss:  [0.24987609]
Epoch:  16/500  Loss:  [0.24987355]
Epoch:  17/500  Loss:  [0.24987156]
Epoch:  18/500  Loss:  [0.24986992]
Epoch:  19/500  Loss:  [0.2498685]
Epoch:  20/500  Loss:  [0.24986722]
Epoch:  21/500  Loss:  [0.24986603]
Epoch:  22/500  Loss:  [0.24986491]
Epoch:  23/500  Loss:  [0.24986383]
Epoch:  24/500  Loss:  [0.24986278]
Epoch:  25/500  Loss:  [0.24986174]
Epoch:  26/500  Loss:  [0.24986072]
Epoch:  27/500  Loss:  [0.2498597]
Epoch:  28/500  Loss:  [0.24985867]
Epoch:

Epoch:  228/500  Loss:  [0.18849467]
Epoch:  229/500  Loss:  [0.18720038]
Epoch:  230/500  Loss:  [0.18592477]
Epoch:  231/500  Loss:  [0.18466807]
Epoch:  232/500  Loss:  [0.18343044]
Epoch:  233/500  Loss:  [0.18221197]
Epoch:  234/500  Loss:  [0.18101271]
Epoch:  235/500  Loss:  [0.17983266]
Epoch:  236/500  Loss:  [0.17867176]
Epoch:  237/500  Loss:  [0.17752993]
Epoch:  238/500  Loss:  [0.17640704]
Epoch:  239/500  Loss:  [0.17530293]
Epoch:  240/500  Loss:  [0.17421742]
Epoch:  241/500  Loss:  [0.17315031]
Epoch:  242/500  Loss:  [0.17210135]
Epoch:  243/500  Loss:  [0.17107032]
Epoch:  244/500  Loss:  [0.17005695]
Epoch:  245/500  Loss:  [0.16906096]
Epoch:  246/500  Loss:  [0.16808208]
Epoch:  247/500  Loss:  [0.16712002]
Epoch:  248/500  Loss:  [0.16617447]
Epoch:  249/500  Loss:  [0.16524513]
Epoch:  250/500  Loss:  [0.16433171]
Epoch:  251/500  Loss:  [0.16343389]
Epoch:  252/500  Loss:  [0.16255137]
Epoch:  253/500  Loss:  [0.16168384]
Epoch:  254/500  Loss:  [0.16083098]
E

Epoch:  462/500  Loss:  [0.09551158]
Epoch:  463/500  Loss:  [0.09537582]
Epoch:  464/500  Loss:  [0.09524063]
Epoch:  465/500  Loss:  [0.09510601]
Epoch:  466/500  Loss:  [0.09497196]
Epoch:  467/500  Loss:  [0.09483847]
Epoch:  468/500  Loss:  [0.09470554]
Epoch:  469/500  Loss:  [0.09457315]
Epoch:  470/500  Loss:  [0.09444132]
Epoch:  471/500  Loss:  [0.09431003]
Epoch:  472/500  Loss:  [0.09417927]
Epoch:  473/500  Loss:  [0.09404905]
Epoch:  474/500  Loss:  [0.09391936]
Epoch:  475/500  Loss:  [0.09379019]
Epoch:  476/500  Loss:  [0.09366155]
Epoch:  477/500  Loss:  [0.09353342]
Epoch:  478/500  Loss:  [0.0934058]
Epoch:  479/500  Loss:  [0.09327868]
Epoch:  480/500  Loss:  [0.09315207]
Epoch:  481/500  Loss:  [0.09302596]
Epoch:  482/500  Loss:  [0.09290035]
Epoch:  483/500  Loss:  [0.09277522]
Epoch:  484/500  Loss:  [0.09265059]
Epoch:  485/500  Loss:  [0.09252643]
Epoch:  486/500  Loss:  [0.09240276]
Epoch:  487/500  Loss:  [0.09227956]
Epoch:  488/500  Loss:  [0.09215683]
Ep

In [7]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.13525868]
 [0.48221731]
 [0.76386501]
 [0.76426623]]
[[0]
 [0]
 [1]
 [1]]


###### Experiment with N = 100

In [8]:
N = 100
x_train, y_train = generate_trainset(N)

In [9]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 1000 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/1000  Loss:  [0.24921357]
Epoch:  2/1000  Loss:  [0.24921568]
Epoch:  3/1000  Loss:  [0.24921784]
Epoch:  4/1000  Loss:  [0.24922006]
Epoch:  5/1000  Loss:  [0.24922232]
Epoch:  6/1000  Loss:  [0.24922462]
Epoch:  7/1000  Loss:  [0.24922695]
Epoch:  8/1000  Loss:  [0.24922931]
Epoch:  9/1000  Loss:  [0.2492317]
Epoch:  10/1000  Loss:  [0.24923411]
Epoch:  11/1000  Loss:  [0.24923654]
Epoch:  12/1000  Loss:  [0.24923898]
Epoch:  13/1000  Loss:  [0.24924144]
Epoch:  14/1000  Loss:  [0.2492439]
Epoch:  15/1000  Loss:  [0.24924637]
Epoch:  16/1000  Loss:  [0.24924884]
Epoch:  17/1000  Loss:  [0.24925132]
Epoch:  18/1000  Loss:  [0.24925379]
Epoch:  19/1000  Loss:  [0.24925626]
Epoch:  20/1000  Loss:  [0.24925872]
Epoch:  21/1000  Loss:  [0.24926118]
Epoch:  22/1000  Loss:  [0.24926363]
Epoch:  23/1000  Loss:  [0.24926607]
Epoch:  24/1000  Loss:  [0.2492685]
Epoch:  25/1000  Loss:  [0.24927091]
Epoch:  26/1000  Loss:  [0.24927332]
Epoch:  27/1000  Loss:  [0.24927571]
Epoch:  28/10

Epoch:  230/1000  Loss:  [0.24958419]
Epoch:  231/1000  Loss:  [0.24958568]
Epoch:  232/1000  Loss:  [0.24958717]
Epoch:  233/1000  Loss:  [0.24958867]
Epoch:  234/1000  Loss:  [0.24959017]
Epoch:  235/1000  Loss:  [0.24959167]
Epoch:  236/1000  Loss:  [0.24959317]
Epoch:  237/1000  Loss:  [0.24959467]
Epoch:  238/1000  Loss:  [0.24959618]
Epoch:  239/1000  Loss:  [0.24959769]
Epoch:  240/1000  Loss:  [0.2495992]
Epoch:  241/1000  Loss:  [0.24960071]
Epoch:  242/1000  Loss:  [0.24960222]
Epoch:  243/1000  Loss:  [0.24960374]
Epoch:  244/1000  Loss:  [0.24960526]
Epoch:  245/1000  Loss:  [0.24960678]
Epoch:  246/1000  Loss:  [0.2496083]
Epoch:  247/1000  Loss:  [0.24960983]
Epoch:  248/1000  Loss:  [0.24961135]
Epoch:  249/1000  Loss:  [0.24961288]
Epoch:  250/1000  Loss:  [0.24961441]
Epoch:  251/1000  Loss:  [0.24961594]
Epoch:  252/1000  Loss:  [0.24961748]
Epoch:  253/1000  Loss:  [0.24961901]
Epoch:  254/1000  Loss:  [0.24962055]
Epoch:  255/1000  Loss:  [0.24962209]
Epoch:  256/10

Epoch:  479/1000  Loss:  [0.24997513]
Epoch:  480/1000  Loss:  [0.24997655]
Epoch:  481/1000  Loss:  [0.24997798]
Epoch:  482/1000  Loss:  [0.2499794]
Epoch:  483/1000  Loss:  [0.24998081]
Epoch:  484/1000  Loss:  [0.24998223]
Epoch:  485/1000  Loss:  [0.24998364]
Epoch:  486/1000  Loss:  [0.24998505]
Epoch:  487/1000  Loss:  [0.24998645]
Epoch:  488/1000  Loss:  [0.24998785]
Epoch:  489/1000  Loss:  [0.24998925]
Epoch:  490/1000  Loss:  [0.24999065]
Epoch:  491/1000  Loss:  [0.24999204]
Epoch:  492/1000  Loss:  [0.24999343]
Epoch:  493/1000  Loss:  [0.24999482]
Epoch:  494/1000  Loss:  [0.2499962]
Epoch:  495/1000  Loss:  [0.24999758]
Epoch:  496/1000  Loss:  [0.24999895]
Epoch:  497/1000  Loss:  [0.25000033]
Epoch:  498/1000  Loss:  [0.2500017]
Epoch:  499/1000  Loss:  [0.25000306]
Epoch:  500/1000  Loss:  [0.25000443]
Epoch:  501/1000  Loss:  [0.25000579]
Epoch:  502/1000  Loss:  [0.25000714]
Epoch:  503/1000  Loss:  [0.2500085]
Epoch:  504/1000  Loss:  [0.25000985]
Epoch:  505/1000

Epoch:  728/1000  Loss:  [0.25020322]
Epoch:  729/1000  Loss:  [0.25020355]
Epoch:  730/1000  Loss:  [0.25020388]
Epoch:  731/1000  Loss:  [0.2502042]
Epoch:  732/1000  Loss:  [0.25020452]
Epoch:  733/1000  Loss:  [0.25020484]
Epoch:  734/1000  Loss:  [0.25020515]
Epoch:  735/1000  Loss:  [0.25020546]
Epoch:  736/1000  Loss:  [0.25020576]
Epoch:  737/1000  Loss:  [0.25020606]
Epoch:  738/1000  Loss:  [0.25020635]
Epoch:  739/1000  Loss:  [0.25020664]
Epoch:  740/1000  Loss:  [0.25020692]
Epoch:  741/1000  Loss:  [0.2502072]
Epoch:  742/1000  Loss:  [0.25020748]
Epoch:  743/1000  Loss:  [0.25020775]
Epoch:  744/1000  Loss:  [0.25020801]
Epoch:  745/1000  Loss:  [0.25020828]
Epoch:  746/1000  Loss:  [0.25020853]
Epoch:  747/1000  Loss:  [0.25020879]
Epoch:  748/1000  Loss:  [0.25020904]
Epoch:  749/1000  Loss:  [0.25020928]
Epoch:  750/1000  Loss:  [0.25020952]
Epoch:  751/1000  Loss:  [0.25020976]
Epoch:  752/1000  Loss:  [0.25020999]
Epoch:  753/1000  Loss:  [0.25021022]
Epoch:  754/10

Epoch:  950/1000  Loss:  [0.25018158]
Epoch:  951/1000  Loss:  [0.25018105]
Epoch:  952/1000  Loss:  [0.25018051]
Epoch:  953/1000  Loss:  [0.25017997]
Epoch:  954/1000  Loss:  [0.25017943]
Epoch:  955/1000  Loss:  [0.25017888]
Epoch:  956/1000  Loss:  [0.25017832]
Epoch:  957/1000  Loss:  [0.25017776]
Epoch:  958/1000  Loss:  [0.25017719]
Epoch:  959/1000  Loss:  [0.25017662]
Epoch:  960/1000  Loss:  [0.25017604]
Epoch:  961/1000  Loss:  [0.25017545]
Epoch:  962/1000  Loss:  [0.25017486]
Epoch:  963/1000  Loss:  [0.25017427]
Epoch:  964/1000  Loss:  [0.25017367]
Epoch:  965/1000  Loss:  [0.25017306]
Epoch:  966/1000  Loss:  [0.25017245]
Epoch:  967/1000  Loss:  [0.25017183]
Epoch:  968/1000  Loss:  [0.2501712]
Epoch:  969/1000  Loss:  [0.25017057]
Epoch:  970/1000  Loss:  [0.25016994]
Epoch:  971/1000  Loss:  [0.25016929]
Epoch:  972/1000  Loss:  [0.25016865]
Epoch:  973/1000  Loss:  [0.25016799]
Epoch:  974/1000  Loss:  [0.25016733]
Epoch:  975/1000  Loss:  [0.25016666]
Epoch:  976/1

In [10]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.4689421 ]
 [0.49667954]
 [0.51204509]
 [0.45334759]]
[[0]
 [0]
 [1]
 [0]]


###### experiment with 6 hidden nodes and N = 1000
This gives good performance. 

In [11]:
N = 1000
x_train, y_train = generate_trainset(N)
hidden_dim = 6

In [12]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 300 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/300  Loss:  [0.25274302]
Epoch:  2/300  Loss:  [0.2499302]
Epoch:  3/300  Loss:  [0.24960193]
Epoch:  4/300  Loss:  [0.24949844]
Epoch:  5/300  Loss:  [0.24940887]
Epoch:  6/300  Loss:  [0.24931951]
Epoch:  7/300  Loss:  [0.24922614]
Epoch:  8/300  Loss:  [0.24912852]
Epoch:  9/300  Loss:  [0.24902584]
Epoch:  10/300  Loss:  [0.24891744]
Epoch:  11/300  Loss:  [0.24880256]
Epoch:  12/300  Loss:  [0.24868039]
Epoch:  13/300  Loss:  [0.24855007]
Epoch:  14/300  Loss:  [0.24841063]
Epoch:  15/300  Loss:  [0.24826103]
Epoch:  16/300  Loss:  [0.24810012]
Epoch:  17/300  Loss:  [0.24792667]
Epoch:  18/300  Loss:  [0.24773931]
Epoch:  19/300  Loss:  [0.24753655]
Epoch:  20/300  Loss:  [0.24731676]
Epoch:  21/300  Loss:  [0.24707817]
Epoch:  22/300  Loss:  [0.24681884]
Epoch:  23/300  Loss:  [0.2465367]
Epoch:  24/300  Loss:  [0.24622946]
Epoch:  25/300  Loss:  [0.2458947]
Epoch:  26/300  Loss:  [0.2455298]
Epoch:  27/300  Loss:  [0.245132]
Epoch:  28/300  Loss:  [0.24469837]
Epoch: 

Epoch:  229/300  Loss:  [0.02170234]
Epoch:  230/300  Loss:  [0.02137808]
Epoch:  231/300  Loss:  [0.02106191]
Epoch:  232/300  Loss:  [0.02075359]
Epoch:  233/300  Loss:  [0.02045286]
Epoch:  234/300  Loss:  [0.02015948]
Epoch:  235/300  Loss:  [0.01987323]
Epoch:  236/300  Loss:  [0.01959389]
Epoch:  237/300  Loss:  [0.01932123]
Epoch:  238/300  Loss:  [0.01905505]
Epoch:  239/300  Loss:  [0.01879515]
Epoch:  240/300  Loss:  [0.01854133]
Epoch:  241/300  Loss:  [0.01829342]
Epoch:  242/300  Loss:  [0.01805122]
Epoch:  243/300  Loss:  [0.01781457]
Epoch:  244/300  Loss:  [0.01758329]
Epoch:  245/300  Loss:  [0.01735723]
Epoch:  246/300  Loss:  [0.01713623]
Epoch:  247/300  Loss:  [0.01692013]
Epoch:  248/300  Loss:  [0.01670879]
Epoch:  249/300  Loss:  [0.01650208]
Epoch:  250/300  Loss:  [0.01629985]
Epoch:  251/300  Loss:  [0.01610197]
Epoch:  252/300  Loss:  [0.01590832]
Epoch:  253/300  Loss:  [0.01571877]
Epoch:  254/300  Loss:  [0.01553321]
Epoch:  255/300  Loss:  [0.01535153]
E

In [13]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.09557819]
 [0.10251848]
 [0.9003078 ]
 [0.90251733]]
[[0]
 [0]
 [1]
 [1]]


In [14]:
# ignore this, this does not work 
# couldn't find the time to fix it, just submitting my effort
# apologies if the above code is not convienent
"""
class NN(object):
    def __init__(self, hidden_dim=2, learn_rate=0.01):
        self.learn_rate = learn_rate
        self.input_dim = 2
        self.hidden_dim = hidden_dim
        self.output_dim = 1
        # initialize all weights and biases to zeros
#         self.A = np.zeros((self.hidden_dim, self.input_dim))  # input to hidden layer weights
#         self.B = np.zeros(self.hidden_dim)                     # hidden layer to output weights 
#         self.a0 = np.zeros(hidden_dim) # input to hidden layer bias
#         self.b0 = 0 # hidden layer to output bias
        self.A = np.random.normal(0, 1, (self.hidden_dim, self.input_dim))
        self.B = np.random.normal(0, 1, self.hidden_dim)
        self.a0 = np.random.normal(0, 1, self.hidden_dim)
        self.b0 = np.random.normal(0, 1, 1)
        
    def sigmoid(self, t):
        return 1/(1  + np.exp(-t))
    
    def dsigmoid(self, t):
        return self.sigmoid(t)*(1 - self.sigmoid(t))
    
    def hidden_layer(self, x):
        self.z = self.sigmoid(np.dot(self.A, x) + self.a0) 
        return self.z
    
    def forward_pass(self, x):
        self.y_hat = self.sigmoid(np.dot(self.B, self.hidden_layer(x)) + self.b0)
        return self.y_hat
    
    def back_propogate(self, X, Y, Y_hat):
        SSE_a, SSE_b   = np.zeros_like(self.A), np.zeros_like(self.B)
        z_bias, y_bias = np.zeros_like(self.a0), np.zeros_like(self.b0)
        for i, x in enumerate(X):
            z = self.hidden_layer(x)
            y_error = Y[i] - Y_hat[i]
            y_delta = -2* y_error * self.dsigmoid(np.dot(self.B, z) + self.b0)
            s = self.dsigmoid(np.dot(self.A,x) + self.a0) * self.B * y_delta
            SSE_b += y_delta*z
            SSE_a += np.tensordot(x,s, axes=0)
            y_bias += y_delta
            z_bias += s
        # update the weights and biases
        self.A -= self.learn_rate * SSE_a
        self.B -= self.learn_rate * SSE_b
        self.a0 -= self.learn_rate * s
        self.b0 -= self.learn_rate * y_delta
        
    def train(self, x_train, y_train, epochs, shuffle=True): 
        if shuffle:
            indices = np.arange(N)
            np.random.shuffle(indices)
            x_train, y_train = x_train[indices], y_train[indices]

        epoch = 1
        while(epoch <= epochs):
            y_hat = np.array([self.forward_pass(x) for x in x_train])
            self.back_propogate(x_train, y_train, y_hat)
            print('Epoch: ', epoch, 'Loss: ', self.loss(y_train, y_hat))
            epoch += 1

    def loss(self, y_train, y_hat):
        return np.mean((y_train - y_hat)**2)
    
    def predict(self, test_x):
#         if test_x.shape == (2,): test_x = np.reshape(test_x, (1,2))
        y_hats = np.array([self.forward_pass(x) for x in test_x])
        y_outs = int(y_hats > 0.5)
        return y_outs
    
#     def load_model(self):
#         # something here
#     def get_weights(self):
#         # some code here
#     def load_weights(self):
#         # some code here
#     def save_model(self):
#         # some code here
"""

"\nclass NN(object):\n    def __init__(self, hidden_dim=2, learn_rate=0.01):\n        self.learn_rate = learn_rate\n        self.input_dim = 2\n        self.hidden_dim = hidden_dim\n        self.output_dim = 1\n        # initialize all weights and biases to zeros\n#         self.A = np.zeros((self.hidden_dim, self.input_dim))  # input to hidden layer weights\n#         self.B = np.zeros(self.hidden_dim)                     # hidden layer to output weights \n#         self.a0 = np.zeros(hidden_dim) # input to hidden layer bias\n#         self.b0 = 0 # hidden layer to output bias\n        self.A = np.random.normal(0, 1, (self.hidden_dim, self.input_dim))\n        self.B = np.random.normal(0, 1, self.hidden_dim)\n        self.a0 = np.random.normal(0, 1, self.hidden_dim)\n        self.b0 = np.random.normal(0, 1, 1)\n        \n    def sigmoid(self, t):\n        return 1/(1  + np.exp(-t))\n    \n    def dsigmoid(self, t):\n        return self.sigmoid(t)*(1 - self.sigmoid(t))\n    \n    def h