In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
path = 'data/active/active1999-2001.cleaned.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,site,age,edu,group,booster,sex,reason,ufov,hvltt,hvltt2,hvltt3,hvltt4,mmse,id
0,1,76,12,1,1,0,28,16,28,28,17,22,27,1
1,1,67,10,1,1,1,13,20,24,22,20,27,25,2
2,6,67,13,3,1,1,24,16,24,24,28,27,27,3
3,5,72,16,1,1,1,33,16,35,34,32,34,30,4
4,4,69,12,4,0,1,30,16,35,29,34,34,28,5


In [6]:
X = np.array([76, 12, 0, 67, 10, 1, 67, 13, 1, 72, 16, 1, 69, 12, 1]).reshape(5, 3)

W1 = np.array([0.0387, -0.9131, 0.0134, -0.4025, -0.8408, -0.7913, 0.0134, 0.0391, 0.3027, 0.7905, 0.7644, 0.1751]).reshape(3, 4)

b1 = np.array([0.4067, 0.5735, -0.3693, -0.0427])

print(X)
print(W1)
print(b1)
print(X @ W1)

H = np.maximum(0, X @ W1 + b1)
print(H)

W2 = np.array([-1.0246, -0.7404,  0.5110, -0.0512]).reshape(4, 1)
b2 = np.array([-0.0290])

Y_hat = H @ W2 + b2
print(Y_hat)

Y = [[22], [27], [27], [34], [34]]
print(np.mean(Y - Y_hat))


[[76 12  0]
 [67 10  1]
 [67 13  1]
 [72 16  1]
 [69 12  1]]
[[ 0.0387 -0.9131  0.0134 -0.4025]
 [-0.8408 -0.7913  0.0134  0.0391]
 [ 0.3027  0.7905  0.7644  0.1751]]
[ 0.4067  0.5735 -0.3693 -0.0427]
[[ -7.1484 -78.8912   1.1792 -30.1208]
 [ -5.5124 -68.3002   1.7962 -26.4014]
 [ -8.0348 -70.6741   1.8364 -26.2841]
 [-10.3637 -77.6135   1.9436 -28.1793]
 [ -7.1166 -71.709    1.8498 -27.1282]]
[[0.     0.     0.8099 0.    ]
 [0.     0.     1.4269 0.    ]
 [0.     0.     1.4671 0.    ]
 [0.     0.     1.5743 0.    ]
 [0.     0.     1.4805 0.    ]]
[[0.3848589]
 [0.7001459]
 [0.7206881]
 [0.7754673]
 [0.7275355]]
28.138260860000003


In [30]:
import numpy as np

np.set_printoptions(precision=4)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

W1 = np.array([
    [0.0387, -0.9131, 0.0134, -0.4025],
    [-0.8408, -0.7913, 0.0134, 0.0391],
    [0.3027, 0.7905, 0.7644, 0.1751]
])
b1 = np.array([0.4067, 0.5735, -0.3693, -0.0427])
W2 = np.array([[-1.0246], [-0.7404], [0.5110], [-0.0512]])
b2 = np.array([-0.0290])

X = np.array([
    [-1.447581, -0.656491, 0.542079],
    [-0.547990, -0.273060, -1.844750],
    [1.790946, -0.656491, 0.542079],
    [-0.188154, 0.877234, -1.844750],
    [-1.267663, -0.273060, 0.542079]
])

Y_true = np.array([[34], [27], [27], [26], [27]])

print("\n---------Initial parameters---------")
print("\nX\n", X)
print("\nY_true\n", Y_true)
print("\nW1\n", W1)
print("\nb1\n", b1)
print("\nW2\n", W2)
print("\nb2\n", b2)

# Learning rate
lr = 0.0001

num_epochs = 3

for epoch in range(num_epochs):
    
    print("\n---------Epoch", epoch + 1, "---------")

    # Forward pass
    Z = np.dot(X, W1) + b1
    H = relu(Z)
    Y_pred = np.dot(H, W2) + b2
    
    print("\nForward pass:")
    print("\nZ\n", Z)
    print("\nH\n", H)
    print("\nY_pred\n", Y_pred)
    
    MSE = np.mean((Y_pred - Y_true) ** 2) / 5
    print(f"\nMSE: {MSE}")
    print(f"RMSE: {np.sqrt(MSE)}") 
    
    loss = np.mean((Y_pred - Y_true) ** 2) / 2
    print(f"\nLoss: {loss}")

    # Backward pass
    dY_pred = Y_pred - Y_true 
    dW2 = np.dot(H.T, dY_pred)
    db2 = np.sum(dY_pred, axis=0)
    dH = np.dot(dY_pred, W2.T)
    dZ = dH * relu_derivative(np.dot(X, W1) + b1)
    dW1 = np.dot(X.T, dH)
    db1 = np.sum(dH, axis=0)

    print("\nBackward pass:")
    print("\ndY_pred\n", dY_pred)
    print("\ndW2\n", dW2)
    print("\ndb2\n", db2)
    print("\ndH\n", dH)
    print("\ndZ\n", dZ)
    print("\ndW1\n", dW1)
    print("\ndb1\n", db1)

    # Update parameters
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    print("\nUpdated parameters:")
    print("\nW1\n", W1)
    print("\nb1\n", b1)
    print("\nW2\n", W2)
    print("\nb2\n", b2)


---------Initial parameters---------

X
 [[-1.4476 -0.6565  0.5421]
 [-0.548  -0.2731 -1.8447]
 [ 1.7909 -0.6565  0.5421]
 [-0.1882  0.8772 -1.8447]
 [-1.2677 -0.2731  0.5421]]

Y_true
 [[34]
 [27]
 [27]
 [26]
 [27]]

W1
 [[ 0.0387 -0.9131  0.0134 -0.4025]
 [-0.8408 -0.7913  0.0134  0.0391]
 [ 0.3027  0.7905  0.7644  0.1751]]

b1
 [ 0.4067  0.5735 -0.3693 -0.0427]

W2
 [[-1.0246]
 [-0.7404]
 [ 0.511 ]
 [-0.0512]]

b2
 [-0.029]

---------Epoch 1 ---------

Forward pass:

Z
 [[ 1.0667  2.8433  0.0169  0.6092]
 [ 0.0567 -0.1683 -1.7904 -0.1558]
 [ 1.1921 -0.1138  0.0603 -0.6943]
 [-0.8966 -1.4071 -1.7702 -0.2557]
 [ 0.7513  2.3756  0.0244  0.5518]]

H
 [[1.0667 2.8433 0.0169 0.6092]
 [0.0567 0.     0.     0.    ]
 [1.1921 0.     0.0603 0.    ]
 [0.     0.     0.     0.    ]
 [0.7513 2.3756 0.0244 0.5518]]

Y_pred
 [[-3.2497]
 [-0.0871]
 [-1.2196]
 [-0.029 ]
 [-2.5735]]

MSE: 178.78781474962278
RMSE: 13.371156073788937

Loss: 446.9695368740569

Backward pass:

dY_pred
 [[-37.2497]
 [-27.0