# Simple Neural Network from Scratch


In [1]:
import numpy as np

# activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)  # derivative of sigmoid

In [2]:
# inputs (4 samples, 2 features)
X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])

# target output
y = np.array([[0],
              [1],
              [1],
              [0]])

Initialize Parameters

Let’s use:

2 input neurons

2 hidden neurons

1 output neuron

In [3]:
np.random.seed(42)
input_neurons = 2
hidden_neurons = 2
output_neurons = 1

# randomly initialize weights and biases
W1 = np.random.uniform(size=(input_neurons, hidden_neurons))
b1 = np.random.uniform(size=(1, hidden_neurons))
W2 = np.random.uniform(size=(hidden_neurons, output_neurons))
b2 = np.random.uniform(size=(1, output_neurons))

learning_rate = 0.1

In [4]:
np.random.seed(42)
input_neurons = 2
hidden_neurons = 2
output_neurons = 1

# randomly initialize weights and biases
W1 = np.random.uniform(size=(input_neurons, hidden_neurons))
b1 = np.random.uniform(size=(1, hidden_neurons))
W2 = np.random.uniform(size=(hidden_neurons, output_neurons))
b2 = np.random.uniform(size=(1, output_neurons))

learning_rate = 0.1

In [5]:
for epoch in range(10000):
    # ---- Forward Pass ----
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)  # prediction

    # ---- Compute Loss (Mean Squared Error) ----
    loss = np.mean((y - a2)**2)

    # ---- Backward Pass ----
    # derivative of loss w.r.t output
    d_a2 = (y - a2)
    d_z2 = d_a2 * sigmoid_derivative(a2)

    dW2 = np.dot(a1.T, d_z2)
    db2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = np.dot(d_z2, W2.T)
    d_z1 = d_a1 * sigmoid_derivative(a1)

    dW1 = np.dot(X.T, d_z1)
    db1 = np.sum(d_z1, axis=0, keepdims=True)

    # ---- Gradient Descent Update ----
    W1 += learning_rate * dW1
    b1 += learning_rate * db1
    W2 += learning_rate * dW2
    b2 += learning_rate * db2

    # (optional) print progress
    if epoch % 1000 == 0:
        print(f"Epoch {epoch} | Loss: {loss:.4f}")

Epoch 0 | Loss: 0.3247
Epoch 1000 | Loss: 0.2406
Epoch 2000 | Loss: 0.1960
Epoch 3000 | Loss: 0.1207
Epoch 4000 | Loss: 0.0305
Epoch 5000 | Loss: 0.0125
Epoch 6000 | Loss: 0.0074
Epoch 7000 | Loss: 0.0051
Epoch 8000 | Loss: 0.0038
Epoch 9000 | Loss: 0.0031


In [6]:
print("\nFinal predictions:")
print(a2.round(3))


Final predictions:
[[0.053]
 [0.952]
 [0.952]
 [0.052]]


# Building the XOR Neural Network with Keras


In [7]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

In [8]:
# Input data
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

# Target output
y = np.array([[0],
              [1],
              [1],
              [0]])

Build the Model

We’ll use:

2 neurons in the input layer

2 neurons in the hidden layer (with ReLU activation)

1 neuron in the output layer (with Sigmoid activation)

In [11]:
model = Sequential([
    Dense(2, input_dim=2, activation='relu'),
    Dense(1, activation='sigmoid')
])

Loss function: measures error (binary_crossentropy for binary output)

Optimizer: algorithm that updates weights (SGD here)

Metrics: what to monitor (accuracy)

In [15]:
model.compile(optimizer=SGD(learning_rate=0.3),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [16]:
history = model.fit(X, y, epochs=10000, verbose=0)

In [14]:
# Evaluate final performance
loss, accuracy = model.evaluate(X, y, verbose=0)
print(f"Final Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

# Predictions
predictions = model.predict(X)
print("\nPredictions:")
print(np.round(predictions, 3))

Final Loss: 0.4775, Accuracy: 0.7500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step

Predictions:
[[0.333]
 [1.   ]
 [0.333]
 [0.333]]
