In [19]:
import numpy as np


In [21]:
class NeuralNetwork:
    def __init__(self, inputSize, hiddenSize, outputSize):
        self.weights1 = np.random.randn(inputSize, hiddenSize) * np.sqrt(2. / inputSize)
        self.bias1 = np.zeros((1, hiddenSize))
        self.weights2 = np.random.randn(hiddenSize, outputSize) * np.sqrt(2. / hiddenSize)
        self.bias2 = np.zeros((1, outputSize))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def relu_derivative(self, x):
        return np.where(x <= 0, 0, 1)
    
    def forward(self, X):
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def backward(self, X, y, learningRate=0.01):
        m = X.shape[0]
        dz2 = self.a2 - y
        dw2 = (1 / m) * np.dot(self.a1.T, dz2)
        db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True)
        dz1 = np.dot(dz2, self.weights2.T) * self.relu_derivative(self.a1)
        dw1 = (1 / m) * np.dot(X.T, dz1)
        db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True)

        self.weights2 -= learningRate * dw2
        self.bias2 -= learningRate * db2
        self.weights1 -= learningRate * dw1
        self.bias1 -= learningRate * db1

    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        loss = -1/m * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss

In [22]:
inputSize = 2
hiddenSize1 = 4

outputSize = 1

options = np.array([[0, 0], [0, 1],[1,0],[1,1]])
outputs = np.array([[0], [1],[1],[0]])

# Generate random training data
i = np.random.choice(4, size=1000)
X = options[i]                     
y = outputs[i]

In [23]:
X

array([[1, 0],
       [1, 1],
       [0, 0],
       ...,
       [1, 1],
       [1, 0],
       [0, 0]])

In [31]:
model = NeuralNetwork(2,4,1)



In [25]:
model.weights1

array([[ 0.08971984, -0.39763865, -0.4518132 , -2.59569674],
       [ 0.16430843, -0.44430535,  0.40342063,  0.48815009]])

In [26]:
model.weights2

array([[ 1.28464508],
       [-1.96387672],
       [-0.28060806],
       [ 0.0180957 ]])

In [27]:
model.bias1

array([[0., 0., 0., 0.]])

In [33]:
epochs = 1

for epoch in range(epochs):

    output = model.forward(X)
    model.backward(X,y)

    print(model.compute_loss(y,output))




0.0002985488032357638
0.00029854853808503366
0.0002985470398429492
0.0002985467629169476
0.00029854702901731446
0.000298546779792259
0.0002985455772850616
0.0002985449137825175
0.0002985439046806565
0.0002985430479118026
0.00029854396844950263
0.00029854306480927515
0.00029854244211116083
0.00029854119890665876
0.0002985407695287392
0.0002985393331432138
0.00029854090769375173
0.00029853948135986354
0.00029853917564019483
0.0002985378088306816
0.00029853730976001487
0.00029853614401023436
0.000298537321652937
0.00029853634636204475
0.00029853546070875045
0.00029853467385487775
0.00029853359493575265
0.0002985330821912378
0.00029853360845168977
0.00029853321137454327
0.00029853174601366127
0.00029853153888938905
0.0002985300408168813
0.0002985298597173187
0.00029853024907449934
0.0002985297227472983
0.0002985285784071295
0.0002985278569648058
0.00029852690598999485
0.00029852621826847666
0.0002985271158144965
0.0002985260081509985
0.00029852544356851987
0.0002985241424756279
0.000298523

KeyboardInterrupt: 

In [34]:
print(model.forward(np.array([[0, 0], [0, 1],[1,0],[1,1]])))

[[9.07296049e-04]
 [9.99897167e-01]
 [9.99899222e-01]
 [6.58205199e-05]]
