In [1]:
import numpy as np

class Value:
    def __init__(self, func=None):
        self.func = func
        self.value = np.array([])

    def compute(self):
        if self.func:
            self.func.compute()

    def diff(self, D):
        if self.func:
            self.func.diff(D)


class Func:
    def __call__(self, inputs):
        self.inputs = inputs
        self.output = Value(self)
        return self.output

    def compute(self):
        pass

    def diff(self, D):
        pass

In [2]:
class LeakyReLU(Func):
    def __init__(self, slope):
        self.slope = slope

    def compute(self):
        x = self.inputs[0]
        x.compute()
        self.output.value = np.where(
            x.value > 0, x.value, x.value * self.slope
        )

    def diff(self, D):
        _D = np.where(self.output.value > 0, 1.0, self.slope).T
        self.inputs[0].diff(D * _D)

rng = np.random.default_rng()

class Linear(Func):
    def __init__(self, m, n):
        self.W = rng.random((m, n))
        self.b = rng.random((m, 1))

    def compute(self):
        x = self.inputs[0]
        x.compute()
        self.output.value = self.W @ x.value + self.b

    def diff(self, D):
        self.inputs[0].diff(D @ self.W)
        n = self.inputs[0].value.size
        _D = self.inputs[0].value.reshape((1, n))
        self.W -= D.T @ _D
        self.b -= D.T

In [None]:
from tqdm import tqdm
import random

X = Value()
L1 = Linear(10, 2)
R1 = LeakyReLU(0.01)
L2 = Linear(50, 10)
R2 = LeakyReLU(0.01)
L3 = Linear(10, 50)
R3 = LeakyReLU(0.01)
L4 = Linear(1, 10)
Y = L4([R3([L3([R2([L2([R1([L1([X])])])])])])])


def f_target(x, y):
    return np.arcsin(x * y) + np.sin(0.2**x + 5 * y)


def f_approx(x, y):
    X.value = np.array([x, y]).reshape(2, 1)
    Y.compute()
    return Y.value[0][0]


samples = []
for i in range(10000):
    x0 = random.uniform(-1, 1)
    x1 = random.uniform(-1, 1)
    samples.append((x0, x1, f_target(x0, x1)))


rate = 0.001
steps = 10000000

for i in tqdm(range(steps)):
    x0, x1, y = random.choice(samples)
    d = f_approx(x0, x1) - y
    Y.diff(np.array([[(d / np.abs(d)) * rate]]))

 71%|████████████████████▌        | 7090450/10000000 [03:55<01:34, 30668.30it/s]

In [None]:
import matplotlib.pyplot as plt

def plot(f1, f2, lower, upper, levels):
    X, Y = np.meshgrid(
        np.linspace(lower, upper, 256), np.linspace(lower, upper, 256)
    )
    Z1 = np.vectorize(f1)(X, Y)
    Z2 = np.vectorize(f2)(X, Y)
    levels = np.linspace(Z1.min(), Z1.max(), levels)
    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.contourf(X, Y, Z1, levels=levels, cmap="plasma")
    ax2.contourf(X, Y, Z2, levels=levels, cmap="plasma")
    plt.show()

plot(f_target, f_approx, lower=-1, upper=1, levels=20)