# Neural Network

Implement the back-propagation algorithm to learn the weights of a perceptron with 2 input nodes, 2 hidden nodes and 1 output node.

## XOR

Note: **Python3** in used.

In [61]:
# imports
import numpy as np

In [73]:
# parameters - input all parameter values here
# note: Unless a cell is found with changed values, the training cells the parameters here 
input_dim = 2
hidden_dim = 2 # dimensions of hidden layers
std = 0.005  # train data noise standard deviation
w_std = 0.5
learn_rate = 0.005

In [82]:
# prepare training data
x_inputs = np.array([np.zeros(2), np.ones(2), np.array([1,0]), np.array([0,1])])
def generate_trainset(N):
    X = np.repeat(x_inputs, N//4, axis=0)
    y_xor = np.logical_xor(X.T[0], X.T[1]).astype(np.float)
    # add noise to data
    X += np.random.normal(0, std, X.shape)
    y_xor += np.random.normal(0, std, N)
    # shuffle the training data
    indices = np.arange(N)
    np.random.shuffle(indices)
    x_train, y_train = X[indices], y_xor[indices]
    return x_train, y_train

In [83]:
def sigmoid( t):
    return 1/(1  + np.exp(-t))

def dsigmoid( t):
    return sigmoid(t)*(1 - sigmoid(t))

######  Experiment with N = 1000
This gives good performance. 

In [84]:
N = 1000
x_train, y_train = generate_trainset(N)

In [85]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))# [1,1], [1,0], [0,0], [0,1]
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 500 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/500  Loss:  [0.43267311]
Epoch:  2/500  Loss:  [0.41004883]
Epoch:  3/500  Loss:  [0.37495991]
Epoch:  4/500  Loss:  [0.32487866]
Epoch:  5/500  Loss:  [0.27529686]
Epoch:  6/500  Loss:  [0.25544857]
Epoch:  7/500  Loss:  [0.2533296]
Epoch:  8/500  Loss:  [0.25305454]
Epoch:  9/500  Loss:  [0.25285668]
Epoch:  10/500  Loss:  [0.25267361]
Epoch:  11/500  Loss:  [0.25250291]
Epoch:  12/500  Loss:  [0.25234365]
Epoch:  13/500  Loss:  [0.25219496]
Epoch:  14/500  Loss:  [0.252056]
Epoch:  15/500  Loss:  [0.25192603]
Epoch:  16/500  Loss:  [0.25180436]
Epoch:  17/500  Loss:  [0.25169036]
Epoch:  18/500  Loss:  [0.25158345]
Epoch:  19/500  Loss:  [0.25148309]
Epoch:  20/500  Loss:  [0.25138881]
Epoch:  21/500  Loss:  [0.25130014]
Epoch:  22/500  Loss:  [0.25121667]
Epoch:  23/500  Loss:  [0.25113803]
Epoch:  24/500  Loss:  [0.25106385]
Epoch:  25/500  Loss:  [0.25099382]
Epoch:  26/500  Loss:  [0.25092763]
Epoch:  27/500  Loss:  [0.25086501]
Epoch:  28/500  Loss:  [0.25080568]
Epoc

Epoch:  255/500  Loss:  [0.10512106]
Epoch:  256/500  Loss:  [0.10300485]
Epoch:  257/500  Loss:  [0.10087121]
Epoch:  258/500  Loss:  [0.09872486]
Epoch:  259/500  Loss:  [0.09657069]
Epoch:  260/500  Loss:  [0.09441372]
Epoch:  261/500  Loss:  [0.09225897]
Epoch:  262/500  Loss:  [0.09011147]
Epoch:  263/500  Loss:  [0.08797613]
Epoch:  264/500  Loss:  [0.0858577]
Epoch:  265/500  Loss:  [0.08376072]
Epoch:  266/500  Loss:  [0.08168943]
Epoch:  267/500  Loss:  [0.07964779]
Epoch:  268/500  Loss:  [0.07763939]
Epoch:  269/500  Loss:  [0.07566747]
Epoch:  270/500  Loss:  [0.07373484]
Epoch:  271/500  Loss:  [0.07184397]
Epoch:  272/500  Loss:  [0.0699969]
Epoch:  273/500  Loss:  [0.06819529]
Epoch:  274/500  Loss:  [0.06644045]
Epoch:  275/500  Loss:  [0.06473332]
Epoch:  276/500  Loss:  [0.06307453]
Epoch:  277/500  Loss:  [0.06146438]
Epoch:  278/500  Loss:  [0.05990293]
Epoch:  279/500  Loss:  [0.05838999]
Epoch:  280/500  Loss:  [0.05692513]
Epoch:  281/500  Loss:  [0.05550775]
Epo

In [86]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.10077908]
 [0.05632131]
 [0.92554713]
 [0.9188792 ]]
[[0]
 [0]
 [1]
 [1]]


###### Experiment with N = 100

In [54]:
N = 100
x_train, y_train = generate_trainset(N)

In [55]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 1000 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/1000  Loss:  [0.26374113]
Epoch:  2/1000  Loss:  [0.26211801]
Epoch:  3/1000  Loss:  [0.26067316]
Epoch:  4/1000  Loss:  [0.25939107]
Epoch:  5/1000  Loss:  [0.2582568]
Epoch:  6/1000  Loss:  [0.25725609]
Epoch:  7/1000  Loss:  [0.25637555]
Epoch:  8/1000  Loss:  [0.25560266]
Epoch:  9/1000  Loss:  [0.25492585]
Epoch:  10/1000  Loss:  [0.25433452]
Epoch:  11/1000  Loss:  [0.25381899]
Epoch:  12/1000  Loss:  [0.25337049]
Epoch:  13/1000  Loss:  [0.25298111]
Epoch:  14/1000  Loss:  [0.25264376]
Epoch:  15/1000  Loss:  [0.25235207]
Epoch:  16/1000  Loss:  [0.25210039]
Epoch:  17/1000  Loss:  [0.25188371]
Epoch:  18/1000  Loss:  [0.25169756]
Epoch:  19/1000  Loss:  [0.25153801]
Epoch:  20/1000  Loss:  [0.25140162]
Epoch:  21/1000  Loss:  [0.25128532]
Epoch:  22/1000  Loss:  [0.25118644]
Epoch:  23/1000  Loss:  [0.25110265]
Epoch:  24/1000  Loss:  [0.2510319]
Epoch:  25/1000  Loss:  [0.25097239]
Epoch:  26/1000  Loss:  [0.25092257]
Epoch:  27/1000  Loss:  [0.25088108]
Epoch:  28/1

Epoch:  225/1000  Loss:  [0.25109063]
Epoch:  226/1000  Loss:  [0.25109287]
Epoch:  227/1000  Loss:  [0.2510951]
Epoch:  228/1000  Loss:  [0.25109733]
Epoch:  229/1000  Loss:  [0.25109956]
Epoch:  230/1000  Loss:  [0.25110179]
Epoch:  231/1000  Loss:  [0.25110402]
Epoch:  232/1000  Loss:  [0.25110625]
Epoch:  233/1000  Loss:  [0.25110848]
Epoch:  234/1000  Loss:  [0.2511107]
Epoch:  235/1000  Loss:  [0.25111292]
Epoch:  236/1000  Loss:  [0.25111515]
Epoch:  237/1000  Loss:  [0.25111736]
Epoch:  238/1000  Loss:  [0.25111958]
Epoch:  239/1000  Loss:  [0.25112179]
Epoch:  240/1000  Loss:  [0.25112401]
Epoch:  241/1000  Loss:  [0.25112621]
Epoch:  242/1000  Loss:  [0.25112842]
Epoch:  243/1000  Loss:  [0.25113062]
Epoch:  244/1000  Loss:  [0.25113282]
Epoch:  245/1000  Loss:  [0.25113502]
Epoch:  246/1000  Loss:  [0.25113721]
Epoch:  247/1000  Loss:  [0.2511394]
Epoch:  248/1000  Loss:  [0.25114158]
Epoch:  249/1000  Loss:  [0.25114376]
Epoch:  250/1000  Loss:  [0.25114594]
Epoch:  251/100

Epoch:  478/1000  Loss:  [0.25128617]
Epoch:  479/1000  Loss:  [0.25128481]
Epoch:  480/1000  Loss:  [0.25128343]
Epoch:  481/1000  Loss:  [0.25128205]
Epoch:  482/1000  Loss:  [0.25128065]
Epoch:  483/1000  Loss:  [0.25127923]
Epoch:  484/1000  Loss:  [0.25127781]
Epoch:  485/1000  Loss:  [0.25127637]
Epoch:  486/1000  Loss:  [0.25127492]
Epoch:  487/1000  Loss:  [0.25127345]
Epoch:  488/1000  Loss:  [0.25127198]
Epoch:  489/1000  Loss:  [0.25127049]
Epoch:  490/1000  Loss:  [0.25126899]
Epoch:  491/1000  Loss:  [0.25126747]
Epoch:  492/1000  Loss:  [0.25126595]
Epoch:  493/1000  Loss:  [0.25126442]
Epoch:  494/1000  Loss:  [0.25126287]
Epoch:  495/1000  Loss:  [0.25126131]
Epoch:  496/1000  Loss:  [0.25125974]
Epoch:  497/1000  Loss:  [0.25125816]
Epoch:  498/1000  Loss:  [0.25125657]
Epoch:  499/1000  Loss:  [0.25125497]
Epoch:  500/1000  Loss:  [0.25125336]
Epoch:  501/1000  Loss:  [0.25125174]
Epoch:  502/1000  Loss:  [0.25125011]
Epoch:  503/1000  Loss:  [0.25124847]
Epoch:  504/

Epoch:  704/1000  Loss:  [0.25094675]
Epoch:  705/1000  Loss:  [0.25094624]
Epoch:  706/1000  Loss:  [0.25094575]
Epoch:  707/1000  Loss:  [0.25094527]
Epoch:  708/1000  Loss:  [0.2509448]
Epoch:  709/1000  Loss:  [0.25094435]
Epoch:  710/1000  Loss:  [0.25094391]
Epoch:  711/1000  Loss:  [0.25094349]
Epoch:  712/1000  Loss:  [0.25094308]
Epoch:  713/1000  Loss:  [0.25094269]
Epoch:  714/1000  Loss:  [0.2509423]
Epoch:  715/1000  Loss:  [0.25094194]
Epoch:  716/1000  Loss:  [0.25094159]
Epoch:  717/1000  Loss:  [0.25094125]
Epoch:  718/1000  Loss:  [0.25094093]
Epoch:  719/1000  Loss:  [0.25094062]
Epoch:  720/1000  Loss:  [0.25094032]
Epoch:  721/1000  Loss:  [0.25094004]
Epoch:  722/1000  Loss:  [0.25093977]
Epoch:  723/1000  Loss:  [0.25093952]
Epoch:  724/1000  Loss:  [0.25093928]
Epoch:  725/1000  Loss:  [0.25093906]
Epoch:  726/1000  Loss:  [0.25093885]
Epoch:  727/1000  Loss:  [0.25093865]
Epoch:  728/1000  Loss:  [0.25093847]
Epoch:  729/1000  Loss:  [0.2509383]
Epoch:  730/100

Epoch:  926/1000  Loss:  [0.25103006]
Epoch:  927/1000  Loss:  [0.25102885]
Epoch:  928/1000  Loss:  [0.25102755]
Epoch:  929/1000  Loss:  [0.25102616]
Epoch:  930/1000  Loss:  [0.25102468]
Epoch:  931/1000  Loss:  [0.25102311]
Epoch:  932/1000  Loss:  [0.25102144]
Epoch:  933/1000  Loss:  [0.25101968]
Epoch:  934/1000  Loss:  [0.25101781]
Epoch:  935/1000  Loss:  [0.25101583]
Epoch:  936/1000  Loss:  [0.25101375]
Epoch:  937/1000  Loss:  [0.25101155]
Epoch:  938/1000  Loss:  [0.25100923]
Epoch:  939/1000  Loss:  [0.25100679]
Epoch:  940/1000  Loss:  [0.25100423]
Epoch:  941/1000  Loss:  [0.25100154]
Epoch:  942/1000  Loss:  [0.25099871]
Epoch:  943/1000  Loss:  [0.25099575]
Epoch:  944/1000  Loss:  [0.25099264]
Epoch:  945/1000  Loss:  [0.25098939]
Epoch:  946/1000  Loss:  [0.25098598]
Epoch:  947/1000  Loss:  [0.25098242]
Epoch:  948/1000  Loss:  [0.25097869]
Epoch:  949/1000  Loss:  [0.25097479]
Epoch:  950/1000  Loss:  [0.25097071]
Epoch:  951/1000  Loss:  [0.25096646]
Epoch:  952/

In [56]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.42433357]
 [0.55641165]
 [0.49047291]
 [0.50132611]]
[[0]
 [1]
 [0]
 [1]]


###### experiment with 6 hidden nodes and N = 1000
This gives good performance. 

In [78]:
N = 1000
x_train, y_train = generate_trainset(N)
hidden_dim = 6

In [80]:
# initialize weights
A  = np.random.normal(0, w_std, (hidden_dim, input_dim))
a0 = np.random.normal(0, w_std, hidden_dim)
b0 = np.random.normal(0, w_std, 1)
B  = np.random.normal(0, w_std, hidden_dim)
epochs = 300 # number of itrations
for epoch in range(epochs):
    dSSE_a, dSSE_b, z_bias, y_bias = np.zeros_like(A), np.zeros_like(B), np.zeros_like(B), 0
    loss = 0
    for i, x in enumerate(x_train):
        z = sigmoid(np.dot(A,x)+a0)
        y_hat = sigmoid(np.dot(B,z)+b0)
        y_error = y_hat - y_train[i]
        y_delta = 2* y_error * dsigmoid(np.dot(B, z) + b0)
        s = dsigmoid(np.dot(A,x) + a0) * B * y_delta
        # print(s.shape)
        dSSE_b += y_delta*z
        dSSE_a += np.tensordot(s,x, axes=0)
        # print(dSSE_a.shape)
        y_bias += y_delta
        z_bias += s
        loss += y_error**2

    A  = A - learn_rate * dSSE_a
    B  = B - learn_rate * dSSE_b
    a0 = a0 - learn_rate * s
    b0 = b0 - learn_rate * y_delta

    print('Epoch: ', str(epoch+1) + '/'+str(epochs), ' Loss: ', loss/N)   

Epoch:  1/300  Loss:  [0.25050003]
Epoch:  2/300  Loss:  [0.25035521]
Epoch:  3/300  Loss:  [0.25029759]
Epoch:  4/300  Loss:  [0.2502416]
Epoch:  5/300  Loss:  [0.25018718]
Epoch:  6/300  Loss:  [0.2501341]
Epoch:  7/300  Loss:  [0.25008217]
Epoch:  8/300  Loss:  [0.25003121]
Epoch:  9/300  Loss:  [0.24998106]
Epoch:  10/300  Loss:  [0.24993154]
Epoch:  11/300  Loss:  [0.2498825]
Epoch:  12/300  Loss:  [0.24983378]
Epoch:  13/300  Loss:  [0.24978524]
Epoch:  14/300  Loss:  [0.24973671]
Epoch:  15/300  Loss:  [0.24968807]
Epoch:  16/300  Loss:  [0.24963916]
Epoch:  17/300  Loss:  [0.24958986]
Epoch:  18/300  Loss:  [0.24954002]
Epoch:  19/300  Loss:  [0.2494895]
Epoch:  20/300  Loss:  [0.24943818]
Epoch:  21/300  Loss:  [0.2493859]
Epoch:  22/300  Loss:  [0.24933254]
Epoch:  23/300  Loss:  [0.24927796]
Epoch:  24/300  Loss:  [0.24922202]
Epoch:  25/300  Loss:  [0.24916458]
Epoch:  26/300  Loss:  [0.24910548]
Epoch:  27/300  Loss:  [0.24904459]
Epoch:  28/300  Loss:  [0.24898175]
Epoch:

Epoch:  242/300  Loss:  [0.00925726]
Epoch:  243/300  Loss:  [0.00913734]
Epoch:  244/300  Loss:  [0.00902]
Epoch:  245/300  Loss:  [0.00890518]
Epoch:  246/300  Loss:  [0.0087928]
Epoch:  247/300  Loss:  [0.0086828]
Epoch:  248/300  Loss:  [0.00857511]
Epoch:  249/300  Loss:  [0.00846967]
Epoch:  250/300  Loss:  [0.00836642]
Epoch:  251/300  Loss:  [0.0082653]
Epoch:  252/300  Loss:  [0.00816625]
Epoch:  253/300  Loss:  [0.00806922]
Epoch:  254/300  Loss:  [0.00797415]
Epoch:  255/300  Loss:  [0.00788099]
Epoch:  256/300  Loss:  [0.00778969]
Epoch:  257/300  Loss:  [0.00770021]
Epoch:  258/300  Loss:  [0.00761249]
Epoch:  259/300  Loss:  [0.00752649]
Epoch:  260/300  Loss:  [0.00744216]
Epoch:  261/300  Loss:  [0.00735947]
Epoch:  262/300  Loss:  [0.00727837]
Epoch:  263/300  Loss:  [0.00719881]
Epoch:  264/300  Loss:  [0.00712077]
Epoch:  265/300  Loss:  [0.0070442]
Epoch:  266/300  Loss:  [0.00696907]
Epoch:  267/300  Loss:  [0.00689533]
Epoch:  268/300  Loss:  [0.00682296]
Epoch:  

In [81]:
def predict(x_test):
    results =  [sigmoid(np.dot(B, sigmoid(np.dot(A, x)+a0)) + b0) for x in x_test]
    return np.array(results)
def decision(x_test):
    return (predict(x_test) > 0.5).astype(int)
print(predict(x_inputs))
print(decision(x_inputs))

[[0.07929603]
 [0.06154217]
 [0.93455219]
 [0.92543788]]
[[0]
 [0]
 [1]
 [1]]


In [57]:
# ignore this, this does not work 
# couldn't find the time to fix it, just submitting my effort
# apologies if the above code is not convienent
"""
class NN(object):
    def __init__(self, hidden_dim=2, learn_rate=0.01):
        self.learn_rate = learn_rate
        self.input_dim = 2
        self.hidden_dim = hidden_dim
        self.output_dim = 1
        # initialize all weights and biases to zeros
#         self.A = np.zeros((self.hidden_dim, self.input_dim))  # input to hidden layer weights
#         self.B = np.zeros(self.hidden_dim)                     # hidden layer to output weights 
#         self.a0 = np.zeros(hidden_dim) # input to hidden layer bias
#         self.b0 = 0 # hidden layer to output bias
        self.A = np.random.normal(0, 1, (self.hidden_dim, self.input_dim))
        self.B = np.random.normal(0, 1, self.hidden_dim)
        self.a0 = np.random.normal(0, 1, self.hidden_dim)
        self.b0 = np.random.normal(0, 1, 1)
        
    def sigmoid(self, t):
        return 1/(1  + np.exp(-t))
    
    def dsigmoid(self, t):
        return self.sigmoid(t)*(1 - self.sigmoid(t))
    
    def hidden_layer(self, x):
        self.z = self.sigmoid(np.dot(self.A, x) + self.a0) 
        return self.z
    
    def forward_pass(self, x):
        self.y_hat = self.sigmoid(np.dot(self.B, self.hidden_layer(x)) + self.b0)
        return self.y_hat
    
    def back_propogate(self, X, Y, Y_hat):
        SSE_a, SSE_b   = np.zeros_like(self.A), np.zeros_like(self.B)
        z_bias, y_bias = np.zeros_like(self.a0), np.zeros_like(self.b0)
        for i, x in enumerate(X):
            z = self.hidden_layer(x)
            y_error = Y[i] - Y_hat[i]
            y_delta = -2* y_error * self.dsigmoid(np.dot(self.B, z) + self.b0)
            s = self.dsigmoid(np.dot(self.A,x) + self.a0) * self.B * y_delta
            SSE_b += y_delta*z
            SSE_a += np.tensordot(x,s, axes=0)
            y_bias += y_delta
            z_bias += s
        # update the weights and biases
        self.A -= self.learn_rate * SSE_a
        self.B -= self.learn_rate * SSE_b
        self.a0 -= self.learn_rate * s
        self.b0 -= self.learn_rate * y_delta
        
    def train(self, x_train, y_train, epochs, shuffle=True): 
        if shuffle:
            indices = np.arange(N)
            np.random.shuffle(indices)
            x_train, y_train = x_train[indices], y_train[indices]

        epoch = 1
        while(epoch <= epochs):
            y_hat = np.array([self.forward_pass(x) for x in x_train])
            self.back_propogate(x_train, y_train, y_hat)
            print('Epoch: ', epoch, 'Loss: ', self.loss(y_train, y_hat))
            epoch += 1

    def loss(self, y_train, y_hat):
        return np.mean((y_train - y_hat)**2)
    
    def predict(self, test_x):
#         if test_x.shape == (2,): test_x = np.reshape(test_x, (1,2))
        y_hats = np.array([self.forward_pass(x) for x in test_x])
        y_outs = int(y_hats > 0.5)
        return y_outs
    
#     def load_model(self):
#         # something here
#     def get_weights(self):
#         # some code here
#     def load_weights(self):
#         # some code here
#     def save_model(self):
#         # some code here
"""

"\nclass NN(object):\n    def __init__(self, hidden_dim=2, learn_rate=0.01):\n        self.learn_rate = learn_rate\n        self.input_dim = 2\n        self.hidden_dim = hidden_dim\n        self.output_dim = 1\n        # initialize all weights and biases to zeros\n#         self.A = np.zeros((self.hidden_dim, self.input_dim))  # input to hidden layer weights\n#         self.B = np.zeros(self.hidden_dim)                     # hidden layer to output weights \n#         self.a0 = np.zeros(hidden_dim) # input to hidden layer bias\n#         self.b0 = 0 # hidden layer to output bias\n        self.A = np.random.normal(0, 1, (self.hidden_dim, self.input_dim))\n        self.B = np.random.normal(0, 1, self.hidden_dim)\n        self.a0 = np.random.normal(0, 1, self.hidden_dim)\n        self.b0 = np.random.normal(0, 1, 1)\n        \n    def sigmoid(self, t):\n        return 1/(1  + np.exp(-t))\n    \n    def dsigmoid(self, t):\n        return self.sigmoid(t)*(1 - self.sigmoid(t))\n    \n    def h