In [1]:
import numpy as np

```
x1 -----\
         \
          \
          
          sig(w1 * x1 + w2 * x2 + b) ---- O (L)
          /
         /
x2 -----/        
```

here `O` is the output of the perceptron and `L` is the loss.

## Ref: Sigmoid Gradient
$$\sigma(x) = \frac{1}{1+e^{-x}}$$

$$\frac{\delta \sigma(x)}{\delta x} = \sigma(x)(1 - \sigma(x))$$

NOTE: Other gradients can be easily calculated

---
# Generic class definitions for various operators/nodes

In [2]:
class Node(object):
    """
    Base object for all inputs and outputs.
    """
    def __init__(self, value, grad):
        self.value = value
        self.gradient = grad

In [3]:
class MultiplyNode(object):
    """
    Multiplies two inputs
    """
    def forward(self, x1, x2):
        self.x1 = x1
        self.x2 = x2
        self.output = Node(self.x1.value * self.x2.value, 0)
        return self.output
    
    def backward(self):
        self.x1.gradient = self.x2.value * self.output.gradient
        self.x2.gradient = self.x1.value * self.output.gradient

In [4]:
class AddNode(object):    
    """
    Adds two inputs x1 and x2.
    """
    def forward(self, x1, x2):
        self.x1 = x1
        self.x2 = x2
        self.output = Node(self.x1.value + self.x2.value, 0)
        return self.output
    
    def backward(self):
        self.x1.gradient = 1 * self.output.gradient
        self.x2.gradient = 1 * self.output.gradient

In [5]:
class SigmoidNode(object):
    """
    Adds a sigmoid non-linearity to a single input
    """
    def forward(self, x):
        self.x = x
        self.output = Node(1/(1 + np.exp(-1 * self.x.value)), 0.0)
        return self.output
        
    def backward(self):
        s = 1/(1 + np.exp(-1 * self.x.value))
        self.x.gradient = (s * (1 - s)) * self.output.gradient

---
# Defining the computational graph for a Perceptron

In [6]:
def forward_nn():
    # w1 * x1
    w1x1 = w1_mul_x1.forward(w1, x1)
    # w2 * x2
    w2x2 = w2_mul_x2.forward(w2, x2)
    # w1*x1 + w2*x2
    w1x1_w2x2 = w1x1_add_w2x2.forward(w1x1, w2x2)
    # w1*x1 + w2*x2 + b
    w1x1_w2x2_b = w1x1w2x2_add_b.forward(w1x1_w2x2, b)
    # sigmoid(w1*x1 + w2*x2 + b)
    output = sigmoid_out.forward(w1x1_w2x2_b)
    return output

def backward_nn():
    sigmoid_out.backward()
    w1x1w2x2_add_b.backward()
    w1x1_add_w2x2.backward()
    w2_mul_x2.backward()
    w1_mul_x1.backward()

### Validation

In [18]:
# Initialize Weights and Bias
w1 = Node(0.1, 0.0)
w2 = Node(0.4, 0.0)
b = Node(-0.02, 0.0)

# Input/Target Output
alpha = 0.001
x1 = Node(0.3, 0.0)
x2 = Node(1.0, 0.0)
y = 0.475

# Create Nodes
w1_mul_x1 = MultiplyNode()
w2_mul_x2 = MultiplyNode()
w1x1_add_w2x2 = AddNode()
w1x1w2x2_add_b = AddNode()
sigmoid_out = SigmoidNode()

In [19]:
for i in range(100000):
    forward_output = forward_nn()
    forward_output.gradient = -2 * (y - forward_output.value)
    backward_nn()
    w1.value -= alpha * w1.gradient
    w2.value -= alpha * w2.gradient
    b.value -= alpha * b.gradient
    
forward_output.value

0.47500000000065534

---
# Putting it all together into a single Perceptron

In [6]:
class Perceptron(object):
    global p1_outpu
    def __init__(self, x1, x2, alpha=0.001):
        ### Hyper parameters
        self.alpha = alpha
        ### Initializing weights/bias to a random float between -1 and 1.
        self.w1 = Node(np.random.uniform(-1, 1), 0.0)
        self.w2 = Node(np.random.uniform(-1, 1), 0.0)
        self.b = Node(np.random.uniform(-1, 1), 0.0)
        ### Input and Output variables
        self.x1 = Node(x1, 0.0)
        self.x2 = Node(x2, 0.0)
        ### Initialize operators nodes required 
        ### for processing the inputs within a perceptron
        self.initialize_operators()
    
    def initialize_operators(self):
        self.w1_mul_x1 = MultiplyNode()
        self.w2_mul_x2 = MultiplyNode()
        self.w1x1_add_w2x2 = AddNode()
        self.w1x1_w2x2_add_b = AddNode()
        self.sigmoid = SigmoidNode()
    
    def forward(self):
        w1x1 = self.w1_mul_x1.forward(self.w1, self.x1)
        w2x2 = self.w2_mul_x2.forward(self.w2, self.x2)
        w1x1_w2x2 = self.w1x1_add_w2x2.forward(w1x1, w2x2)
        w1x1_w2x2_b = self.w1x1_w2x2_add_b.forward(w1x1_w2x2, self.b)
        self.sigmoid.forward(w1x1_w2x2_b)
        #print("sigmoid.output = ", self.sigmoid.output.value)
        
    def backward(self):
        self.sigmoid.backward()
        self.w1x1_w2x2_add_b.backward()
        self.w1x1_add_w2x2.backward()
        self.w2_mul_x2.backward()
        self.w1_mul_x1.backward()
    
    def update(self):
        self.w1.value -= self.alpha * self.w1.gradient
        self.w2.value -= self.alpha * self.w2.gradient
        self.b.value -= self.alpha * self.b.gradient
        print("w1 = ", self.w1.value," w2= ",self.w2.value)
        #print("w2 = ", self.w2.value)

In [7]:
class Perceptron2(object):
    def __init__(self, p_output, alpha=0.001):
        ### Hyper parameters
        self.alpha = alpha
        ### Initializing weights/bias to a random float between -1 and 1.
        self.w3 = Node(np.random.uniform(-1, 1), 0.0)
        self.b = Node(np.random.uniform(-1, 1), 0.0)
        ### Input and Output variables
        self.p_output = Node(p_output, 0.0)
        ### Initialize operators nodes required 
        ### for processing the inputs within a perceptron
        self.initialize_operators()
    
    def initialize_operators(self):
        self.w3_mul_p_output = MultiplyNode()
        self.w3_mul_p_output_add_b = AddNode()
        self.sigmoid = SigmoidNode()
    
    def forward(self):
        w3p1 = self.w3_mul_p_output.forward(self.w3, self.p_output)
        w3p1_b = self.w3_mul_p_output_add_b.forward(w3p1, self.b)
        self.sigmoid.forward(w3p1_b)
        #print("sigmoid.output = ", self.sigmoid.output.value)
        
    def backward(self):
        self.sigmoid.backward()
        self.w3_mul_p_output_add_b.backward()
        self.w3_mul_p_output.backward()
    
    def update(self):
        self.w3.value -= self.alpha * self.w3.gradient
        self.b.value -= self.alpha * self.b.gradient
        
        print "                                                               w3 = ", self.w3.value

### Validation

In [8]:
p = Perceptron(0.11, -1.0, alpha=0.1)
# number of iterations
N = 10000
# expected output 
target = 0.3481972639817
flag = False
print flag
for i in range(N):
    # Step 1. Forward Pass for perceptron P1
    p.forward()
    
    # Step 2. Initializes object of P2 perceptron (uses if condition and flag variable to create P2 object only once)
    if flag == False:
        p2 = Perceptron2(p.sigmoid.output.value, alpha=0.1)
        flag = True
        
    # Step 3. Forward Pass for perceptron P2
    p2.forward()
    
    # Step 4. For P2 - 1.Calculate Loss 2.Backpropogate 3.Update weight
    #-2 * (y - output) is the gradient of output w.r.t square loss function.
    p2.sigmoid.output.gradient = -2 * (target - p2.sigmoid.output.value)
    p2.backward()
    p2.update()
    
    # Step 5. For P1 - 1.Calculate Loss 2.Backpropogate 3.Update weight
    #-2 * (y - output) is the gradient of output w.r.t square loss function.
    p.sigmoid.output.gradient = -2 * (target - p.sigmoid.output.value)
    p.backward()
    p.update()

p2.sigmoid.output.value

False
                                                               w3 =  0.874333091135
('w1 = ', 0.96449060695648758, ' w2= ', 0.24881898359335128)
                                                               w3 =  0.87074156239
('w1 = ', 0.96477549637326931, ' w2= ', 0.24622907980442663)
                                                               w3 =  0.867182407814
('w1 = ', 0.96505625610485501, ' w2= ', 0.24367671860819307)
                                                               w3 =  0.863656013266
('w1 = ', 0.96533291304022506, ' w2= ', 0.24116165555937447)
                                                               w3 =  0.860162733864
('w1 = ', 0.96560549513532357, ' w2= ', 0.23868363651302427)
                                                               w3 =  0.856702894266
('w1 = ', 0.96587403136438366, ' w2= ', 0.23624239806702357)
                                                               w3 =  0.853276789013
('w1 = ', 0.96613855167159302, ' w2= ', 0

                                                               w3 =  0.592990922661
('w1 = ', 0.98057601478297318, ' w2= ', 0.10258800335257857)
                                                               w3 =  0.592910275147
('w1 = ', 0.9805766708048318, ' w2= ', 0.10258203951750004)
                                                               w3 =  0.592830547008
('w1 = ', 0.98057731323560604, ' w2= ', 0.10257619923773388)
                                                               w3 =  0.592751727525
('w1 = ', 0.98057794235672413, ' w2= ', 0.10257047995484241)
                                                               w3 =  0.59267380611
('w1 = ', 0.9805785584437926, ' w2= ', 0.10256487916331131)
                                                               w3 =  0.592596772303
('w1 = ', 0.98057916176671656, ' w2= ', 0.10255939440945704)
                                                               w3 =  0.59252061577
('w1 = ', 0.98057975258981755, ' w2= ', 0.10255402

                                                               w3 =  0.585837033892
('w1 = ', 0.98060767178948605, ' w2= ', 0.10230021238427553)
                                                               w3 =  0.585836986238
('w1 = ', 0.98060767179010988, ' w2= ', 0.10230021237860409)
                                                               w3 =  0.585836939115
('w1 = ', 0.98060767179072084, ' w2= ', 0.10230021237305022)
                                                               w3 =  0.585836892517
('w1 = ', 0.98060767179131914, ' w2= ', 0.10230021236761146)
                                                               w3 =  0.585836846437
('w1 = ', 0.980607671791905, ' w2= ', 0.10230021236228544)
                                                               w3 =  0.58583680087
('w1 = ', 0.98060767179247876, ' w2= ', 0.10230021235706982)
                                                               w3 =  0.585836755811
('w1 = ', 0.98060767179304054, ' w2= ', 0.1023002

                                                               w3 =  0.585832754081
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066528)
                                                               w3 =  0.585832754062
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066527)
                                                               w3 =  0.585832754043
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066526)
                                                               w3 =  0.585832754025
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066524)
                                                               w3 =  0.585832754006
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066523)
                                                               w3 =  0.585832753988
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066521)
                                                               w3 =  0.58583275397
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230

('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  

('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  

('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  

                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.10230021211066442)
                                                               w3 =  0.585832752368
('w1 = ', 0.98060767181958131, ' w2= ', 0.1023

0.34819726398170114

---

# Assignment 3

```
X1 ------\
          \
          (P) ---- (P) ---- (T)
          /
X2 ------/
         
```

1. Create a two layer neural network with one perceptron in each layer (see Diagram above). Write a validation code that does along with your implementation. The goal of network is to optimize the two perceptrons to produce the output target `T` given the inputs `X1` and `X2`. Assume the output `O` of each perceptron is

$$ O = \sigma{(w1*x1 + w2*x2 + b)} $$ where

$$\sigma(x) = \frac{1}{1+e^{-x}}$$

Feel free to change the loss function if you like.

