In [66]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm

In [67]:
dataset = tf.keras.datasets.boston_housing
(X_train, Y_train), (X_test, Y_test) = dataset.load_data()

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((404, 13), (404,), (102, 13), (102,))

In [69]:
def relu(X):
    return np.maximum(X, 0)

def linear(X):
    return X

def root_mean_squired_error(Y_pred, Y_gt):
    return np.sqrt(np.mean((Y_pred - Y_gt) ** 2))

In [94]:
D_in = X_train.shape[1]
H1 = 128
H2 = 32
D_out = 1

η = 0.001
epochs = 1

In [95]:
W1, W2, W3 = np.random.randn(D_in, H1), np.random.randn(H1, H2), np.random.randn(H2, D_out)
B1, B2, B3 = np.random.randn(1, H1), np.random.randn(1, H2), np.random.randn(1, D_out)

In [97]:
for epoch in range(epochs):

    # train
    
    Y_pred = []
    for x, y in tqdm(zip(X_train, Y_train)):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + B1
        out1 = relu(net1)
        print("out1", out1)

        # layer 2
        net2 = out1 @ W2 + B2
        out2 = relu(net2)

        # layer 3
        net3 = out2 @ W3 + B3
        out3 = linear(net3)

        y_pred = out3
        Y_pred.append(y_pred.T)

        # back propagation

        # layer 3
        print(y_pred, y)
        error = -2 * (y - y_pred)
        grad_W3 = out2.T @ error
        grad_B3 = error

        # layer 2
        error = error @ W3.T * 1. #* (out2 > 0)
        grad_W2 = out1.T @ error
        grad_B2 = error

        # layer 1
        error = error @ W2.T * 1.# * (out1 > 0)
        grad_W1 = x @ error
        grad_B1 = error

        # update

        # layer 1
        W1 -= η * grad_W1
        B1 -= η * grad_B1
        
        # layer 2
        W2 -= η * grad_W2
        B2 -= η * grad_B2

        # layer 3
        W3 -= η * grad_W3
        B3 -= η * grad_B3

    Y_pred = np.array(Y_pred)
    loss_train = root_mean_squired_error(Y_pred, Y_train)
    
    # test

    Y_pred = []
    for x, y in zip(X_test, Y_test):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + B1
        out1 = relu(net1)

        # layer 2
        net2 = out1 @ W2 + B2
        out2 = relu(net2)

        # layer 3
        net3 = out2 @ W3 + B3
        out3 = linear(net3)

        y_pred = out3
        Y_pred.append(y_pred.T)

    Y_pred = np.array(Y_pred)
    loss_test = root_mean_squired_error(Y_pred, Y_test)

    print('loss train:', loss_train)
    print('loss test:', loss_test)

print('train completed!')

404it [00:00, 7076.66it/s]

[[nan]] 15.2
[[nan]] 42.3
[[nan]] 50.0
[[nan]] 21.1
[[nan]] 17.7
[[nan]] 18.5
[[nan]] 11.3
[[nan]] 15.6
[[nan]] 15.6
[[nan]] 14.4
[[nan]] 12.1
[[nan]] 17.9
[[nan]] 23.1
[[nan]] 19.9
[[nan]] 15.7
[[nan]] 8.8
[[nan]] 50.0
[[nan]] 22.5
[[nan]] 24.1
[[nan]] 27.5
[[nan]] 10.9
[[nan]] 30.8
[[nan]] 32.9
[[nan]] 24.0
[[nan]] 18.5
[[nan]] 13.3
[[nan]] 22.9
[[nan]] 34.7
[[nan]] 16.6
[[nan]] 17.5
[[nan]] 22.3
[[nan]] 16.1
[[nan]] 14.9
[[nan]] 23.1
[[nan]] 34.9
[[nan]] 25.0
[[nan]] 13.9
[[nan]] 13.1
[[nan]] 20.4
[[nan]] 20.0
[[nan]] 15.2
[[nan]] 24.7
[[nan]] 22.2
[[nan]] 16.7
[[nan]] 12.7
[[nan]] 15.6
[[nan]] 18.4
[[nan]] 21.0
[[nan]] 30.1
[[nan]] 15.1
[[nan]] 18.7
[[nan]] 9.6
[[nan]] 31.5
[[nan]] 24.8
[[nan]] 19.1
[[nan]] 22.0
[[nan]] 14.5
[[nan]] 11.0
[[nan]] 32.0
[[nan]] 29.4
[[nan]] 20.3
[[nan]] 24.4
[[nan]] 14.6
[[nan]] 19.5
[[nan]] 14.1
[[nan]] 14.3
[[nan]] 15.6
[[nan]] 10.5
[[nan]] 6.3
[[nan]] 19.3
[[nan]] 19.3
[[nan]] 13.4
[[nan]] 36.4
[[nan]] 17.8
[[nan]] 13.5
[[nan]] 16.5
[[nan]] 8.3
[[n


