In [26]:
import numpy as np

In [None]:
def relu(x):
    return np.maximum(x, 0)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [28]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

In [29]:
np.random.seed(42)

input_layer_size = 2
hidden_layer_size = 4
output_layer_size = 1
learning_rate = 0.1
epochs = 10000

w1 = np.random.randn(input_layer_size, hidden_layer_size)
w2 = np.random.randn(hidden_layer_size, output_layer_size)
b1 = np.random.randn(1, hidden_layer_size)
b2 = np.random.randn(1, output_layer_size)

In [None]:
for epoch in range(epochs):
    hidden_layer_input = np.dot(X, w1) + b1
    hidden_layer_output = relu(hidden_layer_input)
    
    output_layer_input = np.dot(hidden_layer_output, w2) + b2
    output = sigmoid(output_layer_input)
    
    output_error = y - output
    output_delta = output_error * sigmoid_derivative(output)
    
    hidden_error = output_delta.dot(w2.T)
    hidden_delta = hidden_error * relu_derivative(hidden_layer_output)
    
    w2 += hidden_layer_output.T.dot(output_delta) * learning_rate
    b2 += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
    
    w1 += X.T.dot(hidden_delta) * learning_rate
    b1 += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
    
    if epoch % 1000 == 0:
        loss = np.mean(np.square(output_error))
        print(f'Epoch {epoch}, Loss: {loss}')

Epoch 0, Loss: 0.34314993966841206
Epoch 1000, Loss: 0.017226631270719127
Epoch 2000, Loss: 0.00358728614425571
Epoch 3000, Loss: 0.0018154244198578654
Epoch 4000, Loss: 0.0011814146883300197
Epoch 5000, Loss: 0.0008644144121870219
Epoch 6000, Loss: 0.0006767250459979635
Epoch 7000, Loss: 0.0005535920285729638
Epoch 8000, Loss: 0.0004668650217739234
Epoch 9000, Loss: 0.0004028233772478616


In [None]:
def test_input(input_data):
    print('input_data:', input_data)
    print('w1:', w1)
    print('b1:', b1)
    hidden_layer_input = np.dot(input_data, w1) + b1
    print('hidden_layer_input:', hidden_layer_input)
    hidden_layer_output = relu(hidden_layer_input)
    print('hidden_layer_output:', hidden_layer_output)
    
    print('w2:', w2)
    print('b2:', b2)
    output_layer_input = np.dot(hidden_layer_output, w2) + b2
    print('output_layer_input:', output_layer_input)
    output = (output_layer_input > 0)
    print('output:', output)
    # output = sigmoid(output_layer_input)
    
    return output

In [None]:
test_data = np.array([[1, 0]])
output = test_input(test_data)
print(f"Output for input {test_data}: {output}")

input_data: [[1 0]]
w1: [[ 2.34821193 -0.1382643   2.8750358   1.67298283]
 [ 2.34822006 -0.23413696  2.87505677  1.67297815]]
b1: [[-1.18991116e-04 -1.91328024e+00 -2.87503171e+00 -1.67299902e+00]]
hidden_layer_input: [[ 2.34809294e+00 -2.05154455e+00  4.08805594e-06 -1.61916016e-05]]
hidden_layer_output: [[2.34809294e+00 0.00000000e+00 4.08805594e-06 0.00000000e+00]]
w2: [[ 3.29636826]
 [ 0.54256004]
 [-4.36872952]
 [-2.31995674]]
b2: [[-3.48156055]]
output_layer_input: [[4.25860061]]
output: [[ True]]
Output for input [[1 0]]: [[ True]]


In [33]:
print('w1', w1*1000)
print('b1', b1*1000)
print('w2', w2*1000)
print('b2', b2*1000)

w1 [[2348.21192642 -138.26430117 2875.03579964 1672.98283189]
 [2348.22006149 -234.13695695 2875.05677114 1672.97815421]]
b1 [[-1.18991116e-01 -1.91328024e+03 -2.87503171e+03 -1.67299902e+03]]
w2 [[ 3296.36825866]
 [  542.56004359]
 [-4368.72951991]
 [-2319.95674011]]
b2 [[-3481.56054914]]
