In [1]:
import numpy as np

In [2]:
# Define the activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [3]:
# Define the Mean Squared Error loss function and its derivative
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mse_loss_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size

In [4]:
# Initialize the network parameters
input_size = 1
hidden_size = 10
output_size = 1

In [5]:
# Weights and biases
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [6]:
# Forward pass
def forward(X):
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    y_pred = z2  # For regression, we use a linear activation function in the output layer
    return y_pred, a1

In [12]:
# Backward pass
def backward(X, y_true, y_pred, a1, learning_rate=0.01):
    global W1, b1, W2, b2
    # Calculate the gradient of the loss with respect to output
    dL_dy_pred = mse_loss_derivative(y_true, y_pred)

    # Calculate the gradients for the second layer
    dL_dW2 = np.dot(a1.T, dL_dy_pred)
    dL_db2 = np.sum(dL_dy_pred, axis=0, keepdims=True)
    
    # Calculate the gradients for the first layer
    dL_da1 = np.dot(dL_dy_pred, W2.T)
    dL_dz1 = dL_da1 * sigmoid_derivative(a1)
    dL_dW1 = np.dot(X.T, dL_dz1)
    dL_db1 = np.sum(dL_dz1, axis=0, keepdims=True)
    
    # Update weights and biases
    W1 -= learning_rate * dL_dW1
    b1 -= learning_rate * dL_db1
    W2 -= learning_rate * dL_dW2
    b2 -= learning_rate * dL_db2


    

In [13]:
# Training the network
def train(X, y, epochs=1000, learning_rate=0.01):
    for epoch in range(epochs):
        y_pred, a1 = forward(X)
        loss = mse_loss(y, y_pred)
        backward(X, y, y_pred, a1, learning_rate)
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")

In [14]:
# Generate some sample data for training
X = np.array([[i] for i in range(-50, 50)])
y = 2 * X + 1  # Linear relation with some noise

In [15]:
# Train the neural network
train(X, y, epochs=1000, learning_rate=0.01)

Epoch 0, Loss: 3489.6112216628057
Epoch 100, Loss: 832.5406784502022
Epoch 200, Loss: 808.7303787543738
Epoch 300, Loss: 811.0401341562782
Epoch 400, Loss: 808.6851566107373
Epoch 500, Loss: 810.1347161310671
Epoch 600, Loss: 809.2053937976522
Epoch 700, Loss: 808.9828408843038
Epoch 800, Loss: 808.8530675169567
Epoch 900, Loss: 808.7612930426337


  return 1 / (1 + np.exp(-x))


In [16]:
# Predict and evaluate
y_pred, _ = forward(X)
print("Predictions:", y_pred)

Predictions: [[-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492757]
 [-50.01492758]
 [-50.01492772]
 [-50.01492915]
 [-50.0149446 ]
 [-50.01512283]
 [-50.01799915]
 [-50.10349641]
 [  1.00493097]
 [ 51.13383746]
 [ 51.03847677]
 [ 51.03551462]
 [ 51.0353331 ]
 [ 51.03531739]
 [ 51.03531593]
 [ 51.03531579]
 [ 51.03531578]
 [ 51.03531578]
 [ 51.03531578]
 [ 51.03531

  return 1 / (1 + np.exp(-x))
