In [27]:
import numpy as np

  # Initialize the Network

In [28]:
input_size = 1 
hidden_size = 10  
output_size = 1

# Initialize weights and biases

In [29]:
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.01  
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01 
b2 = np.zeros((1, output_size))

# Forward Propagation

In [30]:
def relu(Z):
    return np.maximum(0, Z)

In [31]:
def forward(X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    return Z2, A1

# Loss Calculation (MSE)

In [32]:
def mse_loss(Y, Y_pred):
    return np.mean((Y - Y_pred) ** 2)

In [33]:
def gradient_descent(params, grads, learning_rate):
    for param, grad in zip(params, grads):
        param -= learning_rate * grad

# Backward Propagation

In [34]:
def backward(X, Y, Y_pred, A1, learning_rate=0.01):
    global W1, b1, W2, b2
    
    m = X.shape[0] 

    dZ2 = Y_pred - Y  
    dW2 = np.dot(A1.T, dZ2) / m  
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * (A1 > 0)  
    dW1 = np.dot(X.T, dZ1) / m  
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m 
    params = [W1, b1, W2, b2]
    grads = [dW1, db1, dW2, db2]
    
    gradient_descent(params, grads, learning_rate)


    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2


#  Training the Network

In [35]:
def train(X, Y, epochs=1000, learning_rate=0.01):
    for i in range(epochs):
        Y_pred, A1 = forward(X)  
        loss = mse_loss(Y, Y_pred)  
        backward(X, Y, Y_pred, A1, learning_rate) 
        
        if i % 100 == 0:
            print(f"Epoch {i}: Loss = {loss:.4f}")

# Sample Data

In [36]:
X = np.array([[1], [2], [3], [4], [5]])  
Y = np.array([[2], [4], [6], [8], [10]])

# Train

In [37]:
train(X,Y, epochs= 1000, learning_rate = 0.01)

Epoch 0: Loss = 44.0225
Epoch 100: Loss = 0.1467
Epoch 200: Loss = 0.0163
Epoch 300: Loss = 0.0014
Epoch 400: Loss = 0.0001
Epoch 500: Loss = 0.0000
Epoch 600: Loss = 0.0000
Epoch 700: Loss = 0.0000
Epoch 800: Loss = 0.0000
Epoch 900: Loss = 0.0000


# Test

In [38]:
Y_pred, _ = forward(X)
print("Predicted:", Y_pred)

Predicted: [[2.00000796]
 [4.00000496]
 [6.00000197]
 [7.99999897]
 [9.99999598]]


# Loss Functions

## MAE

In [39]:
def mae_loss(Y, Y_pred):
    return np.mean(np.abs(Y - Y_pred))
MAE = mae_loss(Y,Y_pred)
display(MAE)

3.986958279522668e-06

## Cross-Entropy Loss

In [40]:
def cross_entropy_loss(Y, Y_pred):
    epsilon = 1e-12  
    Y_pred = np.clip(Y_pred, epsilon, 1. - epsilon)  
    return -np.mean(Y * np.log(Y_pred) + (1 - Y) * np.log(1 - Y_pred))
CEL = cross_entropy_loss(Y, Y_pred)
display(Y,Y_pred)

array([[ 2],
       [ 4],
       [ 6],
       [ 8],
       [10]])

array([[2.00000796],
       [4.00000496],
       [6.00000197],
       [7.99999897],
       [9.99999598]])

## Hinge Loss

In [41]:
def hinge_loss(Y, Y_pred):
    return np.mean(np.maximum(0, 1 - Y * Y_pred))
HL = hinge_loss(Y, Y_pred)
display(HL)

0.0