In [1]:
import random
import numpy as np
random.seed(777)


### Toy data generation

In [2]:
# Input and Target value
data = [
    [[0, 0], [0]],
    [[0, 1], [1]],
    [[1, 0], [1]],
    [[1, 1], [0]]
]

### Environmental settings

In [3]:
iterations=5000  # Iterations
lr=0.1  # learning rate 
mo=0.4  # momentum coefficient


### Activation functions and weight matrix

In [4]:
# Sigmoid
def sigmoid(x, derivative=False):
    if (derivative == True): # Each return value when differentiating or not
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

# Hyperbolic tangent
def tanh(x, derivative=False): 
    if (derivative == True): # Each return value when differentiating or not
        return 1 - x ** 2
    return np.tanh(x)

# Function to initialize weight matrices
def makeMatrix(i, j, fill=0.0):
    mat = []
    for i in range(i):
        mat.append([fill] * j)
    return mat


In [19]:
# Run neural network
class NeuralNetwork:

    # Initialize models
    def __init__(self, num_x, num_yh, num_yo, bias=1):

        # Input(num_x), Hidden(num_yh), output(num_yo), bias
        self.num_x = num_x + bias  
        self.num_yh = num_yh
        self.num_yo = num_yo

        # Initialize activation function
        self.activation_input = [1.0] * self.num_x
        self.activation_hidden = [1.0] * self.num_yh
        self.activation_out = [1.0] * self.num_yo

        # weight matrices input
        self.weight_in = makeMatrix(self.num_x, self.num_yh)
        for i in range(self.num_x):
            for j in range(self.num_yh):
                self.weight_in[i][j] = random.random()

        # weight matrices output
        self.weight_out = makeMatrix(self.num_yh, self.num_yo)
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                self.weight_out[j][k] = random.random()

        # Weighted Initial Values for Momentum SGD
        self.gradient_in = makeMatrix(self.num_x, self.num_yh)
        self.gradient_out = makeMatrix(self.num_yh, self.num_yo)

        
        
    # forward propagation 
    def update(self, inputs):

        # activation function for input layer
        for i in range(self.num_x - 1):
            self.activation_input[i] = inputs[i]

        # activation function for hidden layer
        for j in range(self.num_yh):
            sum = 0.0
            for i in range(self.num_x):
                sum = sum + self.activation_input[i] * self.weight_in[i][j]
           # selecting activation functions between sigmoid and tanh, differentiation
            self.activation_hidden[j] = sigmoid(sum, False)

        # activation function for output layer
        for k in range(self.num_yo):
            sum = 0.0
            for j in range(self.num_yh):
                sum = sum + self.activation_hidden[j] * self.weight_out[j][k]
            
            # selecting activation functions between sigmoid and tanh
            self.activation_out[k] = sigmoid(sum, False)

        return self.activation_out[:]
    
    
    
    # running backpropagation
    def backPropagate(self, targets):

        # output delta 
        output_deltas = [0.0] * self.num_yo
        for k in range(self.num_yo):
            error = targets[k] - self.activation_out[k]
            # selecting activation functions between sigmoid and tanh, differentiation
            output_deltas[k] = sigmoid(self.activation_out[k], True) * error

        # hidden node delta
        hidden_deltas = [0.0] * self.num_yh
        for j in range(self.num_yh):
            error = 0.0
            for k in range(self.num_yo):
                error = error + output_deltas[k] * self.weight_out[j][k]
                # selecting activation functions between sigmoid and tanh, differentiation
            hidden_deltas[j] = sigmoid(self.activation_hidden[j], True) * error

        # Update output weights
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                gradient = output_deltas[k] * self.activation_hidden[j]
                v = mo * self.gradient_out[j][k] - lr * gradient
                self.weight_out[j][k] += v
                self.gradient_out[j][k] = gradient

        # Input weight update
        for i in range(self.num_x):
            for j in range(self.num_yh):
                gradient = hidden_deltas[j] * self.activation_input[i]
                v = mo*self.gradient_in[i][j] - lr * gradient
                self.weight_in[i][j] += v
                self.gradient_in[i][j] = gradient

        # Calculation of error (least squares method)
        error = 0.0
        for k in range(len(targets)):
            error = error + 0.5 * (targets[k] - self.activation_out[k]) ** 2
        return error

    
    
    # Doing training
    def train(self, patterns):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets)
                
            if i % 500 == 0:
                print('error: %-.5f' % error)
                
    
    # Printing results
    def result(self, patterns):
        for p in patterns:
            print('Input: %s, Predict: %s' % (p[0], self.update(p[0])))

In [21]:
# model instantiation
n = NeuralNetwork(2, 3, 1)


# Doing training
n.train(data)

# Result
n.result(data)

0.3651267673345187
0.14691586391989978
0.10540076479377652
0.021859748142623345
0.009285721753224596
0.005609337192869759
0.003956390321656173
0.0030351826165898707
0.002453120635427478
0.0020539851553678964
Input: [0, 0], Predict: [0.017732820819339936]
Input: [0, 1], Predict: [0.9510075898232156]
Input: [1, 0], Predict: [0.9499517731360895]
Input: [1, 1], Predict: [0.05958406581748622]


### Breaking down the code to understand it
####  Initialize models

In [5]:
num_x = 2
num_yh = 2
num_yo = 1
bias = 1

# Size: input(num_x), Hidden(num_yh), output(num_yo), bias
num_x = num_x + bias  
num_yh = num_yh
num_yo = num_yo

# Initialize activation function
activation_input = [1.0] * num_x
activation_hidden = [1.0] * num_yh
activation_out = [1.0] * num_yo

# weight matrices input
weight_in = makeMatrix(num_x, num_yh)
for i in range(num_x):
    for j in range(num_yh):
        weight_in[i][j] = random.random()

        
# weight matrices output
weight_out = makeMatrix(num_yh, num_yo)
for j in range(num_yh):
    for k in range(num_yo):
        weight_out[j][k] = random.random()
        
        
# Weighted Initial Values for Momentum SGD
gradient_in = makeMatrix(num_x, num_yh)
gradient_out = makeMatrix(num_yh, num_yo)

####  Update parameters

In [6]:
for d in data:
    inputs = d[0]

    # activation function for input layer
    for i in range(num_x - 1):
        activation_input[i] = inputs[i]
        
    # activation function for hidden layer
    for j in range(num_yh):
        sum = 0.0
        for i in range(num_x):
            sum = sum + activation_input[i] * weight_in[i][j]
        # selecting activation functions between sigmoid and tanh, differentiation
        activation_hidden[j] = sigmoid(sum, False)
        
        
    # activation function for output layer
    for k in range(num_yo):
        sum = 0.0
        for j in range(num_yh):
            sum = sum + activation_hidden[j] * weight_out[j][k]

        # selecting activation functions between sigmoid and tanh
        activation_out[k] = sigmoid(sum, False)

#### Run backpropagation

In [11]:
for d in data:
    targets = d[1]
    
    # delta 
    output_deltas = [0.0] * num_yo    
    for k in range(num_yo):
        error = targets[k] - activation_out[k]
        # selecting activation functions between sigmoid and tanh, differentiation
        output_deltas[k] = sigmoid(activation_out[k], True) * error
        
        
    # Error function of hidden node
    hidden_deltas = [0.0] * num_yh
    for j in range(num_yh):
        error = 0.0
        for k in range(num_yo):
            error = error + output_deltas[k] * weight_out[j][k]
            # selecting activation functions between sigmoid and tanh, differentiation
        hidden_deltas[j] = sigmoid(activation_hidden[j], True) * error
        
        
    
    # Update output weights
    for j in range(num_yh):
        for k in range(num_yo):
            gradient = output_deltas[k] * activation_hidden[j]
            v = mo * gradient_out[j][k] - lr * gradient
            weight_out[j][k] += v
            gradient_out[j][k] = gradient
            
            
    # Input weight update
    for i in range(num_x):
        for j in range(num_yh):
            gradient = hidden_deltas[j] * activation_input[i]
            v = mo*gradient_in[i][j] - lr * gradient
            weight_in[i][j] += v
            gradient_in[i][j] = gradient
            
            
    # Calculation of error (least squares method)
    error = 0.0
    for k in range(len(targets)):
        error = error + 0.5 * (targets[k] - activation_out[k]) ** 2
        
    print(error)

0.40900896473032294
0.004565528433366953
0.004565528433366953
0.40900896473032294


In [None]:
# Run neural network
class NeuralNetwork:

    # Initialize models
    def __init__(self, num_x, num_yh, num_yo, bias=1):
        pass
       
    # forward propagation 
    def update(self, inputs):
        pass
    
    # running backpropagation
    def backPropagate(self, targets):
        pass
    
    # Doing training
    def train(self, patterns):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets)
            if i % 500 == 0:
                print('error: %-.5f' % error)