In [64]:
import numpy as np
import pandas as pd

In [65]:
db = pd.read_csv('regr2.csv')
db

Unnamed: 0,x1,x2,t1,t2
0,1.0,0.5,10.0,5.0
1,2.5,1.3,20.5,10.2
2,3.2,2.1,30.1,15.3


In [66]:
X = db[['x1', 'x2']].values
y = db[['t1','t2']].values
print(X)
print(y)

[[1.  0.5]
 [2.5 1.3]
 [3.2 2.1]]
[[10.   5. ]
 [20.5 10.2]
 [30.1 15.3]]


In [67]:
# Forward Pass #

def relu(x):
    return np.maximum(0, x) #works componently with numpy arrays

def identity(x):
    return x

def relu_deriv(x):
    return np.where(x > 0, 1, 0) #works componently with numpy arrays (condition, if condition true then x, else y)
    
def forward(X,w1,w2,w3):
    # X: inputs
    # w1: weights input -> hidden1 adjusted for bias
    # w2: weights hidden1 -> hidden2 adjusted for bias
    # w3: weights hidden2 -> output adjusted for bias
    Xc = np.concatenate([X, np.ones((X.shape[0], 1))], axis=1)
    h1 = relu(Xc @ w1)
    h1c = np.concatenate([h1, np.ones((h1.shape[0], 1))], axis=1)
    h2 = relu(h1c @ w2)
    h2c = np.concatenate([h2, np.ones((h2.shape[0], 1))], axis=1)
    o1 = identity(h2c @ w3)
    return o1, h1, h2, Xc, h1c, h2c

In [68]:
# Backward Pass #

def backward(o,t,w2,w3,h2,h1,h1c,h2c,Xc):
    # o: output of forward pass
    # t: target values
    # w2: weights hidden1 -> hidden2 removed bias
    # w3: weights hidden2 -> output removed bias
    # h2: hidden2 activations
    # h1: hidden1 activations
    # h1c: hidden1 activations with bias
    # h2c: hidden2 activations with bias
    # Xc: inputs with bias

    delta_3 = o - t
    delta_2 = relu_deriv(h2)* (delta_3 @ w3[:-1].T)
    delta_1 = relu_deriv(h1)* (delta_2 @ w2[:-1].T)

    dW3 = h2c.T @ delta_3
    dW2 = h1c.T @ delta_2
    dW1 = Xc.T @ delta_1 
    #I forgot the gradient part, and remember its included with bias, huge mistake

    return dW3, dW2, dW1

In [69]:
#Stochastic Gradient Descent #

def sgd(W,dW,eta):
    new_weights = []
    for W_i, dW_i in zip(W, dW): #since sgd is in the training loop, we can use zip to iterate over the weights and gradients together and not need to loop inside the function
        updated = W_i - eta * dW_i
        new_weights.append(updated)
    return new_weights

In [70]:
# now we do a loop, inside it is initialized weights, foward pass, backward, and sgd

def train(X,y,eta=0.01,epochs=1000):
    #initialize weights
    w1 = np.random.randn(X.shape[1]+1, 3)
    w2 = np.random.randn(3+1,3)
    w3 = np.random.randn(3+1, 2) #+1 for bias

    for epoch in range(epochs):
        o, h1, h2, Xc, h1c, h2c = forward(X,w1,w2,w3) #output, h1, h2, Xc, h1c, h2c
        dW3, dW2, dW1 = backward(o,y,w2,w3,h2,h1,h1c,h2c,Xc) #outputs gradient of weights
        w1, w2, w3 = sgd([w1,w2,w3],[dW1,dW2,dW3],eta) #update weights using sgd
        if epoch % 100 == 0:
           print(f"Epoch {epoch}: Loss = {np.mean(np.square(o - y))}")
    return o, w1, w2, w3 #weights need to be returned to be used in the future

print(train(X,y,0.0001,10000)[0]) #the code error is

Epoch 0: Loss = 360.69270673807233
Epoch 100: Loss = 290.6918476376073
Epoch 200: Loss = 258.44002502888276
Epoch 300: Loss = 202.4339198225507
Epoch 400: Loss = 122.84386228893118
Epoch 500: Loss = 61.597086596078206
Epoch 600: Loss = 32.74573332538816
Epoch 700: Loss = 16.56555871805452
Epoch 800: Loss = 7.282216297912227
Epoch 900: Loss = 2.8613201329161733
Epoch 1000: Loss = 1.0415552797295196
Epoch 1100: Loss = 0.3677953903351476
Epoch 1200: Loss = 0.1419359383621012
Epoch 1300: Loss = 0.06802079462899216
Epoch 1400: Loss = 0.04339864986281714
Epoch 1500: Loss = 0.033858936561193605
Epoch 1600: Loss = 0.02844876314360434
Epoch 1700: Loss = 0.025504709449908586
Epoch 1800: Loss = 0.023321203477082404
Epoch 1900: Loss = 0.021444250444790886
Epoch 2000: Loss = 0.019740677171160497
Epoch 2100: Loss = 0.018182126401242737
Epoch 2200: Loss = 0.01675188320980853
Epoch 2300: Loss = 0.015437400260410655
Epoch 2400: Loss = 0.014228299942701047
Epoch 2500: Loss = 0.01311556937328285
Epoch 26