<a href="https://colab.research.google.com/github/InowaR/colab/blob/main/OOP_XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [247]:
import numpy as np


def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)


def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs=1000, learning_rate=0.1, verbose=False):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            output = predict(network, x)
            error += loss(y, output)
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)
        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={error}")
    return error


class Dense():
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

class Activation():
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))


class Tanh(Activation):
    name = 'Tanh'
    def __init__(self):
        def tanh(x):
            return np.tanh(x)

        def tanh_prime(x):
            return 1 - np.tanh(x) ** 2

        super().__init__(tanh, tanh_prime)

class Sigmoid(Activation):
    name = 'Sigmoid'
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)


class ReLU(Activation):
    name = 'ReLU'
    def __init__(self):
        def relu(x):
            return np.maximum(0, x)

        def relu_prime(x):
            return (x > 0).astype(np.float32)

        super().__init__(relu, relu_prime)



x = [[0, 0], [0, 1], [1, 0], [1, 1]]
y = [[0], [1], [1], [0]]

X = np.reshape(x, (len(x), len(x[0]), 1))
Y = np.reshape(y, (len(y), len(y[0]), 1))

In [253]:
import itertools

def get_activation_combinations(objects):
    permutations = list(itertools.permutations(objects))
    for element in objects:
        permutations.append([element, element, element])
    return permutations

activation_functions = [ReLU(), Tanh(), Sigmoid()]

def evaluate_network(network, X, Y):
  error = train(network, mse, mse_prime, X, Y)
  return error


function_configs = get_activation_combinations(activation_functions)

best_network = None
best_error = np.inf
best_first, best_second, best_third = None, None, None


for f in function_configs:
  network = [
      Dense(2, 3),
      f[0],
      Dense(3, 4),
      f[1],
      Dense(4, 1),
      f[2]
  ]

  error = evaluate_network(network, X, Y)

  if error < best_error:
    best_network = network
    best_error = error
    best_first, best_second, best_third = f[0], f[1], f[2]

print("Best function configuration:", best_first.name, best_second.name, best_third.name)
print("Best training error:", best_error)

Best function configuration: Tanh Sigmoid ReLU
Best training error: 2.2340787354891936e-32


In [254]:
def evaluate_network(network, X, Y):
  error = train(network, mse, mse_prime, X, Y)
  return error


neuron_configs = [(2, 3), (3, 4), (4, 5), (5, 6), (7, 8), (9, 10)]

best_network = None
best_error = np.inf
best_neurons1, best_neurons2 = None, None


for neurons1, neurons2 in neuron_configs:
  network = [
      Dense(2, neurons1),
      best_first,
      Dense(neurons1, neurons2),
      best_second,
      Dense(neurons2, 1),
      best_third
  ]

  error = evaluate_network(network, X, Y)

  if error < best_error:
    best_network = network
    best_error = error
    best_neurons1, best_neurons2 = neurons1, neurons2


print("Best neuron configuration:", best_neurons1, best_neurons2)
print("Best training error:", best_error)

Best neuron configuration: 4 5
Best training error: 5.789826895314128e-33


In [255]:
def evaluate_network(network, X, Y, lr):
  error = train(network, mse, mse_prime, X, Y, learning_rate=lr)
  return error


learning_rates = [0.1, 0.01, 0.001]

best_lr = None
best_error = np.inf

for lr in learning_rates:
    neurons1, neurons2 = best_neurons1, best_neurons2
    network = [
        Dense(2, neurons1),
        best_first,
        Dense(neurons1, neurons2),
        best_second,
        Dense(neurons2, 1),
        best_third
    ]
    error = evaluate_network(network, X, Y, lr)
    if error < best_error:
      best_lr = lr
      best_error = error

print("Best learning rate:", best_lr)
print("Best training error:", best_error)


Best learning rate: 0.001
Best training error: 0.2295512519238576


In [256]:
neurons1, neurons2 = best_neurons1, best_neurons2

network = [
    Dense(2, neurons1),
    Tanh(),
    Dense(neurons1, neurons2),
    Tanh(),
    Dense(neurons2, 1),
    Tanh()
]

train(network, mse, mse_prime, X, Y, epochs=10000, learning_rate=best_lr, verbose=False)

print(predict(network, X[0]))
print(predict(network, X[1]))
print(predict(network, X[2]))
print(predict(network, X[3]))

[[0.00290109]]
[[0.96515455]]
[[0.96572018]]
[[0.00206009]]
