# Basic MLP

In [145]:
import numpy as np

In [146]:
class node: 
    def __init__(self, which, childnodes) -> None:
        self.theta = np.random.random(3)
        if which =='sigmoid':
            self.activation_function = sigmoid
            self.activation_function_derivative = sigmoid_derivative
        elif which == 'relu':
            self.activation_function = relu
            self.activation_function_derivative = relu_derivative
            
        
        self.childnodes = childnodes
        self.parentnodes = []  # for backpropagation
        self.z = 0
        self.return_saved = False
        


    def activation(self):
        if self.return_saved:
            return self.z
        # store childactivations for backpropagation
        self.childactivations = np.array([node.activation() for node in self.childnodes])
        self.a = self._preactivation(self.theta, self.childactivations)
        self.z = self.activation_function(self.a)
        # print('a',self.a, 'z',self.z)
        self.return_saved = True
        return self.z

    def _preactivation(self, theta, x):
        return theta @ x

    def backpropagation(self, alpha):
        # compute error gradient
        self.error_gradient = 0
        for i,parentnode in enumerate(self.parentnodes):
            if parentnode.__class__.__name__ != 'node':  # last layer, where loss is 2(ypred-y)
                self.error_gradient+=parentnode 
            else:
                self.error_gradient+=parentnode.error_gradient *  parentnode.activation_function_derivative(parentnode.z) *  parentnode.get_theta(self)             

        # print('error_gradient',self.error_gradient)
        # compute new weights
        for j in range(len(self.theta)):
            self.theta[j] = self.theta[j] -alpha*self.error_gradient * self.activation_function_derivative(self.z) * self.childnodes[j].z

    def get_theta(self, childnode):
        for i, cn in enumerate(self.childnodes):
            if cn == childnode:
                return self.theta[i]
    
    def undo_return_saved(self):
        self.return_saved = False
        for node in self.childnodes:
            if node.__class__.__name__ == 'node':
                node.undo_return_saved()
    
    
                
class value:
    def __init__(self,value) -> None:
      self.z = value
    
    def activation(self):
        return self.z



def sigmoid(a):
    return 1/(1+np.e**(-a))

def sigmoid_derivative(a):
    return a*(1-a)

def relu(a):
    return max(0,a)

def relu_derivative(a):
    return int(a>0)



### Structure

In [166]:
# hypothesis(node03)
#         /|\    
# bias02 node12 node22  
#         /|\    //|  
# bias01 node11 node21  
#         /|\    //|  
# bias00   x0     x1    

In [147]:
# input
bias00 = value(1)
x0 = value(0)
x1 = value(0)

# first hidden layer
bias01 = value(1)
node11 = node('sigmoid', childnodes=[bias00, x0, x1])
node21 = node('sigmoid', childnodes=[bias00, x0, x1])

# second hidden layer
bias02 = value(1)
node12 = node('sigmoid', childnodes=[bias01, node11, node21])
node22 = node('sigmoid', childnodes=[bias01, node11, node21])

# link back backpropagation
node11.parentnodes = [node12, node22]
node21.parentnodes = [node12, node22]
# output
node03 = node('sigmoid', childnodes=[bias02, node12, node22])
# node13 = node('sigmoid', childnodes=[bias02, node12, node22])

# link back for backpropagation
node12.parentnodes = [node03]
node22.parentnodes = [node03]

def hypothesis(x):
    x0.z = x[0]
    x1.z = x[1]
    
    y0 = node03.activation()
    # y1 = node13.activation(compute_state)

    
    
    # node13.change_compute_state(compute_state)
    # print(y0)
    node03.undo_return_saved()
    return [y0]


# probably should have used this from the start ->
layers = [[node03],[node12,node22], [node11, node21]]

def backpropagation(y, alpha):
    loss_derivative(node03.z, y)
    node03.parentnodes = [loss_derivative(node03.z, y)]
    
    for layer in layers:
        for node in layer:
            node.backpropagation(alpha=0.1)

    # node13.backpropagation()

def loss_derivative(ypred, y):
    return 2*(ypred-y)


In [148]:
for layer in layers:
    print('layer')
    for node in layer:
        print(node.theta)

layer
[0.8154639  0.77799772 0.52839451]
layer
[0.40883463 0.62323347 0.37720334]
[0.36581101 0.0051474  0.91516391]
layer
[0.7011486  0.83811481 0.78013344]
[0.05390138 0.97853241 0.27522573]


## Test XOR


### Forward Phase

In [149]:
X = np.array([[0,0], 
              [0,1],
              [1,0],
              [1,1]])
y = np.array([0,1,1,0])

In [150]:
hypothesis(X[0])

[0.8527216170572637]

### Backward phase

#### Stochastic descent (1 sample)

In [164]:
for i in range(10000):
    idx = np.random.randint(0,len(y))
    hypothesis(X[idx])
    backpropagation(y[idx], alpha=0.8)
    error = sum((hypothesis(X[i])[0]-y[i])**2 for i in range(len(y)))
    print(error)

0.0150539765989051
0.015053195053891782
0.015048854313450825
0.015045136581595647
0.015044118373557534
0.015041340933433164
0.015037198240682385
0.015033500548485932
0.015032480224041155
0.015029073439756404
0.015027966126824222
0.015024845866437677
0.015020889268532412
0.015018114105445603
0.01501647914894256
0.015011436299782263
0.015007065470682555
0.01500514936196189
0.015004574515588738
0.014999578559760307
0.014998910996080021
0.014997843107870962
0.014992651603258078
0.014991990001547625
0.014991674504615634
0.014986074650616462
0.014986021617826696
0.014985833262744816
0.014987030362518867
0.01498002701315242
0.01497353833787629
0.014974385909907316
0.01497624157940381
0.014979071923548339
0.014971151814535988
0.01497428762189668
0.014978354343001734
0.014983321900023001
0.014973717282927082
0.014964645075585172
0.014955746460064822
0.014947715297240479
0.014939873673111054
0.014932557983226822
0.014925757710812844
0.014919703087411
0.014919413472016883
0.014913675019379723
0.0

In [172]:
for i in range(len(y)):
    print(f'X: {X[i]}')
    print(f'y: {y[i]} \ny_pred: {hypothesis(X[i])[0]}\n')


X: [0 0]
y: 0 
y_pred: 0.02801529097677105

X: [0 1]
y: 1 
y_pred: 0.96521603348442

X: [1 0]
y: 1 
y_pred: 0.9648998041481337

X: [1 1]
y: 0 
y_pred: 0.040263703553695275

