In [1]:
import numpy as np


class ActivationManager():
    def activate(self, z):
        pass

    def derivate(self, a):
        pass

    def cost(self, predictions, truth, w=None, lambd=None):
        pass


class SigmoidActivator(ActivationManager):
    def activate(self, z):
        return 1 / (1 + np.exp(-z))

    def derivate(self, a):
        return a * (1-a)

    def cost(self, predictions, truth, w=None, lambd=None):
        # if np.any(f == 1) or np.any(f == 0):
        # return "NAN"
        a = truth * np.log(predictions)
        b = (1 - truth) * np.log(1 - predictions)
        result = -np.mean(a+b)
        if w and lambd:
            result += lambda_sum(w, lambd)
        return result


class LinearActivator(ActivationManager):
    def activate(self, z):
        return z

    def cost(self, predictions, truth, w=None, lambd=None):
        result = np.mean((predictions - truth)**2) / 2
        if w and lambd:
            result += lambda_sum(w, lambd)
        return result

    def derivate(self, a):
        return 1


training_data = np.array([
    [0, 0, 0, 0],
    [0, 0, 1, 0],
    [0, 1, 0, 0],
    [0, 1, 1, 0],
    [1, 0, 0, 1],
    [1, 0, 1, 1],
    [1, 1, 0, 1],
    [1, 1, 1, 1]
])  
x_train = training_data[:, 0:3]
y_train = training_data[:, 3:]

np.random.seed(1)

w1 = 2 * np.random.random((3, 5)) - 1
w2 = 2 * np.random.random((5, 1)) - 1


def lambda_sum(w, lambd):
    return np.mean(lambd * w ** 2) / 2
def cost(self, predictions, truth, w=None, lambd=None):
    # if np.any(f == 1) or np.any(f == 0):
    # return "NAN"
    a = truth * np.log(predictions)
    b = (1 - truth) * np.log(1 - predictions)
    result = -np.mean(a+b)
    if w and lambd:
        result += lambda_sum(w, lambd)
    return result



In [2]:
activator = SigmoidActivator()
costs = []
for i in range(10000):
    z1 = np.dot(x_train, w1)
    a1 = activator.activate(z1)
    z2 = np.dot(a1, w2)
    a2 = activator.activate(z2)

    e2 = a2 - y_train
    g2 = e2 * activator.derivate(a2)
    e1 = np.dot(g2, w2.T)
    g1 = e1 * activator.derivate(a1)

    gw2 = np.dot(a1.T, g2)
    gw1 = np.dot(x_train.T, g1)

    w2 -= gw2
    w1 += np.dot(x_train.T, g1)
