In [83]:
import numpy as np
import plotly.graph_objects as go

x = np.linspace(-4 * np.pi, 4 * np.pi, 1000).reshape(-1, 1)
y_true = np.sin(x)

In [74]:
from time import time

np.random.seed(int(time()))

W1 = np.random.randn(1, 10) * np.sqrt(1 / 1)
b1 = np.zeros((1, 10))
W2 = np.random.randn(10, 10) * np.sqrt(1 / 10)
b2 = np.zeros((1, 10))
W3 = np.random.randn(10, 1) * np.sqrt(1 / 10)
b3 = np.zeros((1, 1))

In [71]:
def tanh(z):
    return np.tanh(z)


def tanh_d(z):
    return 1 - np.tanh(z) ** 2


lr, epochs = 0.05, 5000

In [75]:
for _ in range(epochs):
    z1 = x @ W1 + b1
    a1 = tanh(z1)
    z2 = a1 @ W2 + b2
    a2 = tanh(z2)
    y_pred = a2 @ W3 + b3

    loss = np.mean((y_pred - y_true) ** 2)

    # Backprop
    dL = 2 * (y_pred - y_true) / len(x)
    dW3 = a2.T @ dL
    db3 = np.sum(dL, axis=0, keepdims=True)

    da2 = dL @ W3.T
    dz2 = da2 * tanh_d(z2)
    dW2 = a1.T @ dz2
    db2 = np.sum(dz2, axis=0, keepdims=True)

    da1 = dz2 @ W2.T
    dz1 = da1 * tanh_d(z1)
    dW1 = x.T @ dz1
    db1 = np.sum(dz1, axis=0, keepdims=True)

    W3 -= lr * dW3
    b3 -= lr * db3
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

print(f"Loss: {loss}")

Loss: 0.40394014381961


In [56]:
import plotly.io as pio

pio.renderers.default = "browser"
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=x.flatten(), y=y_true.flatten(), mode="lines", name="True Curve")
)
fig.add_trace(
    go.Scatter(x=x.flatten(), y=y_pred.flatten(), mode="lines", name="Predicted Curve")
)
fig.update_layout(
    title="Curve Prediction (3-layer NN with Sigmoid)", xaxis_title="x", yaxis_title="y"
)
fig.show()

In [84]:
def n_layer_nn(x, y, N, lr=0.01, epochs=5000, hidden_size=10):

    # N-layer neural network (N-1 hidden layers + 1 output layer) with tanh activations.

    m, d_in = x.shape
    d_out = y.shape[1]

    # W and bs
    sizes = [d_in] + [hidden_size] * (N - 2) + [d_out]
    W = [
        np.random.randn(sizes[i], sizes[i + 1]) * np.sqrt(1 / sizes[i])
        for i in range(len(sizes) - 1)
    ]
    b = [np.zeros((1, sizes[i + 1])) for i in range(len(sizes) - 1)]

    for _ in range(epochs):
        Z, A = [], [x]
        for i in range(len(W) - 1):
            Z.append(A[-1] @ W[i] + b[i])
            A.append(tanh(Z[-1]))

        Z.append(A[-1] @ W[-1] + b[-1])
        y_pred = Z[-1]

        loss = np.mean((y_pred - y) ** 2)

        dL = 2 * (y_pred - y) / m
        dW, db = [None] * len(W), [None] * len(W)

        dW[-1] = A[-1].T @ dL
        db[-1] = np.sum(dL, axis=0, keepdims=True)
        dA = dL @ W[-1].T
        # descent
        for i in reversed(range(len(W) - 1)):
            dZ = dA * tanh_d(Z[i])
            dW[i] = A[i].T @ dZ
            db[i] = np.sum(dZ, axis=0, keepdims=True)
            if i > 0:
                dA = dZ @ W[i].T

        for i in range(len(W)):
            W[i] -= lr * dW[i]
            b[i] -= lr * db[i]

    return W, b, y_pred, loss

In [85]:
res = n_layer_nn(x, y_true, 4)
print(res[3])
y_pred = res[2]
pio.renderers.default = "browser"
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=x.flatten(), y=y_true.flatten(), mode="lines", name="True Curve")
)
fig.add_trace(
    go.Scatter(x=x.flatten(), y=y_pred.flatten(), mode="lines", name="Predicted Curve")
)
fig.update_layout(
    title="Curve Prediction (3-layer NN with tanh)", xaxis_title="x", yaxis_title="y"
)
fig.show()

0.37314682606382893
