# Backpropogation (Cross Entropy)

In [32]:
import copy
import math
import numpy as np
import matplotlib.pyplot as plt
import random

In [33]:
def transfer_function(x, x0):
    return math.tanh(x/(2 * x0))

def derivative_transfer_function(x, x0):
    f = transfer_function(x, x0)
    return (0.5 * (1 + f) * (1 - f))/x0

In [34]:
NumInputNeurons = 2
NumHiddenNeurons = 4
NumOutputNeurons = 1

input_neurons = [[1, 1], [1, -1], [-1, 1], [-1, -1]]
target_vector = [-1, 1, 1, -1]
learning_rates = [0.1, 0.2, 0.3]
Cs = [0.5, 1.0, 1.5]
x0s = [0.5, 1.0, 1.5]
tolerance = 0.05

In [35]:
# The commented portion was used to check initial weights for non converging runs

def backpropogation(learning_rate, C, x0):
    squared_errors = []
    epochs = 100000
    converge_epochs = []

    for k in xrange(100):
        converged = False

#         weights_hidden = []
#         for i in xrange(NumInputNeurons):
#             weights = []
#             for j in xrange(NumHiddenNeurons):
#                 weights.append(random.uniform(-C, C))
#             weights_hidden.append(weights)

#         bias_hidden = []
#         weights_output = []
#         for i in xrange(NumHiddenNeurons):
#             bias_hidden.append(random.uniform(-C, C))
#             weights_output.append(random.uniform(-C, C))

#         bias_output = random.uniform(-C, C)

        weights_hidden = [[0.1970, 0.3191, -0.1448, 0.3594],
                          [0.3099, 0.1904, -0.0347, -0.4861]]
        bias_hidden = [-0.3378, 0.2771, 0.2859, -0.3329]

        weights_output = [0.4919, -0.2913, -0.3979, 0.3581]
        bias_output = -0.1401

        z_in = []
        z = []
        delta_hidden = []
        for i in xrange(NumHiddenNeurons):
            z_in.append(0.0)
            z.append(0.0)
            delta_hidden.append(0.0)

        delta_output = 0.0

        y_in = 0.0
        y = 0.0

#         initial_weights_hidden = copy.deepcopy(weights_hidden)
#         initial_bias_hidden = copy.deepcopy(bias_hidden)
#         initial_weights_output = copy.deepcopy(weights_output)
#         initial_bias_output = copy.deepcopy(bias_output)

        for epoch in xrange(epochs):
            squared_error = 0
            for input_index in xrange(len(input_neurons)):
                weights_hidden_delta = []
                for i in xrange(NumInputNeurons):
                    weights = []
                    for j in xrange(NumHiddenNeurons):
                        weights.append(0.0)
                    weights_hidden_delta.append(weights)

                bias_hidden_delta = []
                weights_output_delta = []
                for i in xrange(NumHiddenNeurons):
                    bias_hidden_delta.append(0.0)
                    weights_output_delta.append(0.0)

                bias_output_delta = 0.0

                for i in xrange(NumHiddenNeurons):
                    z_in[i] = bias_hidden[i]
                    for j in xrange(NumInputNeurons):
                        z_in[i] = z_in[i] + (weights_hidden[j][i] * input_neurons[input_index][j])
                    z[i] = transfer_function(z_in[i], x0)

                y_in = bias_output
                for i in xrange(NumHiddenNeurons):
                    y_in = y_in + (weights_output[i] * z[i])
                y = transfer_function(y_in, x0)

                error = y - target_vector[input_index]
                squared_error = squared_error + math.pow(error, 2)

                delta_output = (-2 / (y + target_vector[input_index])) * derivative_transfer_function(y_in, x0)
                for i in xrange(NumHiddenNeurons):
                    weights_output_delta[i] = learning_rate * delta_output * z[i]
                    delta_hidden[i] = delta_output * weights_output[i] * derivative_transfer_function(z_in[i], x0)
                bias_output_delta = learning_rate * delta_output

                for i in xrange(NumHiddenNeurons):
                    for j in xrange(NumInputNeurons):
                        weights_hidden_delta[j][i] = learning_rate * delta_hidden[i] * input_neurons[input_index][j]
                    bias_hidden_delta[i] = learning_rate * delta_hidden[i]

                for i in xrange(NumHiddenNeurons):
                    weights_output[i] = weights_output[i] - weights_output_delta[i]
                bias_output = bias_output - bias_output_delta

                for i in xrange(NumHiddenNeurons):
                    for j in xrange(NumInputNeurons):
                        weights_hidden[j][i] = weights_hidden[j][i] - weights_hidden_delta[j][i]
                    bias_hidden[i] = bias_hidden[i] - bias_hidden_delta[i]

            squared_errors.append(squared_error)
            if squared_error < tolerance:
                converged = True
                converge_epochs.append(epoch+1)
                print (epoch+1)
                break
        if not converged:
            print 0
#             print "{},{},{},{},{},{},{}\n{},{}".format(
#                 initial_weights_hidden[0][0], initial_weights_hidden[0][1],
#                 initial_bias_hidden[0], initial_bias_hidden[1],
#                 initial_weights_output[0], initial_weights_output[1],
#                 initial_bias_output,
#                 initial_weights_hidden[1][0], initial_weights_hidden[1][1])
#             print "{},{},{},{},{},{},{},{},{},{},{},{},{}\n{},{},{},{}".format(
#                 initial_weights_hidden[0][0], initial_weights_hidden[0][1], initial_weights_hidden[0][2], initial_weights_hidden[0][3],
#                 initial_bias_hidden[0], initial_bias_hidden[1], initial_bias_hidden[2], initial_bias_hidden[3],
#                 initial_weights_output[0], initial_weights_output[1], initial_weights_output[2], initial_weights_output[3],
#                 initial_bias_output,
#                 initial_weights_hidden[1][0], initial_weights_hidden[1][1], initial_weights_hidden[1][2], initial_weights_hidden[1][3])
#             print "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n{},{},{},{},{},{}".format(
#                 initial_weights_hidden[0][0], initial_weights_hidden[0][1], initial_weights_hidden[0][2], initial_weights_hidden[0][3], initial_weights_hidden[0][4], initial_weights_hidden[0][5],
#                 initial_bias_hidden[0], initial_bias_hidden[1], initial_bias_hidden[2], initial_bias_hidden[3], initial_bias_hidden[4], initial_bias_hidden[5],
#                 initial_weights_output[0], initial_weights_output[1], initial_weights_output[2], initial_weights_output[3], initial_weights_output[4], initial_weights_output[5],
#                 initial_bias_output,
#                 initial_weights_hidden[1][0], initial_weights_hidden[1][1], initial_weights_hidden[1][2], initial_weights_hidden[1][3], initial_weights_hidden[1][4], initial_weights_hidden[1][5])
#             print "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n{},{},{},{},{},{},{},{}".format(
#                 initial_weights_hidden[0][0], initial_weights_hidden[0][1], initial_weights_hidden[0][2], initial_weights_hidden[0][3], initial_weights_hidden[0][4], initial_weights_hidden[0][5], initial_weights_hidden[0][6], initial_weights_hidden[0][7],
#                 initial_bias_hidden[0], initial_bias_hidden[1], initial_bias_hidden[2], initial_bias_hidden[3], initial_bias_hidden[4], initial_bias_hidden[5], initial_bias_hidden[6], initial_bias_hidden[7],
#                 initial_weights_output[0], initial_weights_output[1], initial_weights_output[2], initial_weights_output[3], initial_weights_output[4], initial_weights_output[5], initial_weights_output[6], initial_weights_output[7],
#                 initial_bias_output,
#                 initial_weights_hidden[1][0], initial_weights_hidden[1][1], initial_weights_hidden[1][2], initial_weights_hidden[1][3], initial_weights_hidden[1][4], initial_weights_hidden[1][5], initial_weights_hidden[1][6], initial_weights_hidden[1][7])
#             print "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n{},{},{},{},{},{},{},{},{},{}".format(
#                 initial_weights_hidden[0][0], initial_weights_hidden[0][1], initial_weights_hidden[0][2], initial_weights_hidden[0][3], initial_weights_hidden[0][4], initial_weights_hidden[0][5], initial_weights_hidden[0][6], initial_weights_hidden[0][7], initial_weights_hidden[0][8], initial_weights_hidden[0][9],
#                 initial_bias_hidden[0], initial_bias_hidden[1], initial_bias_hidden[2], initial_bias_hidden[3], initial_bias_hidden[4], initial_bias_hidden[5], initial_bias_hidden[6], initial_bias_hidden[7], initial_bias_hidden[8], initial_bias_hidden[9],
#                 initial_weights_output[0], initial_weights_output[1], initial_weights_output[2], initial_weights_output[3], initial_weights_output[4], initial_weights_output[5], initial_weights_output[6], initial_weights_output[7], initial_weights_output[8], initial_weights_output[9],
#                 initial_bias_output,
#                 initial_weights_hidden[1][0], initial_weights_hidden[1][1], initial_weights_hidden[1][2], initial_weights_hidden[1][3], initial_weights_hidden[1][4], initial_weights_hidden[1][5], initial_weights_hidden[1][6], initial_weights_hidden[1][7], initial_weights_hidden[1][8], initial_weights_hidden[1][9])

    print "Times converged: " + str(len(converge_epochs))
    print "Times not converged: " + str(100-len(converge_epochs))
    print "Average epochs: " + str(np.mean(converge_epochs))
    print "Median epochs: " + str(np.median(converge_epochs))

In [32]:
for learning_rate in learning_rates:
    for C in Cs:
        for x0 in x0s:
            print str(learning_rate) + "\t" + str(C) + "\t" + str(x0)
            backpropogation(learning_rate, C, x0)
            print "\n"