In [80]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [82]:
# Load dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target.reshape(-1, 1)  # reshape y to be a column vector

In [84]:
# Normalize features manually (mean=0, std=1)
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X_norm = (X - X_mean) / X_std

In [86]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=42)

In [88]:
# Initialize parameters
input_size = 10
hidden_size = 8
output_size = 1
lr = 0.01
epochs = 100

In [90]:
# Initialize weights and biases
W1 = np.random.randn(input_size, hidden_size) 
b = np.random.randn()
W2 = np.random.randn(hidden_size, output_size) 


In [92]:
# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [94]:
# MSE loss function to calculate loss
def loss(y_hat_1, y1, y_hat_2, y2):
    return 0.5 * ((y_hat_1 - y1) ** 2 + (y_hat_2 - y2) ** 2)

In [96]:
# Relu function
def relu(x):
    return x if x > 0 else 0

In [98]:
# Lists to store loss
loss = []

In [100]:
# Training loop
for epoch in range(epochs):
    # Forward propagation
    Z1 = X_train @ W1 + b1
    A1 = sigmoid(Z1)
    Z2 = A1 @ W2 + b2  # Linear output
    y_pred = Z2


In [102]:
# # Forward propagation function
# def forward_propagation(inputs, weights, bias, y_values):
#     w1, w2, w3, w4, w5, w6, w7, w8 = weights
#     b1, b2 = bias
#     i1, i2 = inputs
#     y1, y2 = y_values

#     # Hidden layer
#     h1 = i1 * w1 + i2 * w3 + b1
#     h1_out = relu(h1)
#     h2 = i1 * w2 + i2 * w4 + b1
#     h2_out = relu(h2)

#     # Output layer
#     o1 = h1_out * w5 + h2_out * w7 + b2
#     o1_out = sigmoid(o1)
#     o2 = h1_out * w6 + h2_out * w8 + b2
#     o2_out = sigmoid(o2)

#     print("\n\t-Forward Propagation-")
#     print("\n")
#     print("h1: ", h1, "\th1_out: ", h1_out.round(5))
#     print("h2: ", h2, "\th2_out: ", h2_out.round(5))
#     print("o1: ", o1, "\to1_out: ", o1_out.round(5))
#     print("o2: ", o2, "\to2_out: ", o2_out.round(5))
#     print("\nCurrent Loss: ", loss(o1_out, y1, o2_out, y2).round(5))

#     hidden_layers = (h1_out, h2_out)
#     output_layers = (o1_out, o2_out)
#     backward_propagation(inputs, weights, bias, y_values, hidden_layers, output_layers)

In [104]:
# # Backward propagation function
# def backward_propagation(inputs, weights, bias, y_values, hidden_layers, output_layers):
#     w1, w2, w3, w4, w5, w6, w7, w8 = weights
#     b1, b2 = bias
#     i1, i2 = inputs
#     y1, y2 = y_values
#     h1_out, h2_out = hidden_layers
#     o1_out, o2_out = output_layers
#     lr = 0.5

#     # Output layer deltas
#     delta_o1 = (o1_out - y1) * o1_out * (1 - o1_out)
#     delta_o2 = (o2_out - y2) * o2_out * (1 - o2_out)

#     # Hidden layer deltas
#     delta_h1 = (delta_o1 * w5 + delta_o2 * w6) * h1_out * (1 - h1_out)
#     delta_h2 = (delta_o1 * w7 + delta_o2 * w8) * h2_out * (1 - h2_out)

#     # Gradients for weights
#     new_w5 = w5 - lr * delta_o1 * h1_out
#     new_w7 = w7 - lr * delta_o1 * h2_out
#     new_w6 = w6 - lr * delta_o2 * h1_out
#     new_w8 = w8 - lr * delta_o2 * h2_out
#     new_w1 = w1 - lr * delta_h1 * i1
#     new_w3 = w3 - lr * delta_h1 * i2
#     new_w2 = w2 - lr * delta_h2 * i1
#     new_w4 = w4 - lr * delta_h2 * i2

#     # Gradients for biases
#     new_b1 = b1 - lr * (delta_h1 + delta_h2)
#     new_b2 = b2 - lr * (delta_o1 + delta_o2)

#     print("\n\t-Backward Propagation-")
#     print("\n")
#     print("old w1: ", w1, "\tnew w1: ", new_w1.round(5))
#     print("old w2: ", w2, "\tnew w2: ", new_w2.round(5))
#     print("old w3: ", w3, "\tnew w3: ", new_w3.round(5))
#     print("old w4: ", w4, "\tnew w4: ", new_w4.round(5))
#     print("old w5: ", w5, "\tnew w5: ", new_w5.round(5))
#     print("old w6: ", w6, "\tnew w6: ", new_w6.round(5))
#     print("old w7: ", w7, "\tnew w7: ", new_w7.round(5))
#     print("old w8: ", w8, "\tnew w8: ", new_w8.round(5))
#     print("old b1: ", b1, "\tnew b1: ", new_b1.round(5))
#     print("old b2: ", b2, "\tnew b2: ", new_b2.round(5))

#     new_weights = (new_w1, new_w2, new_w3, new_w4, new_w5, new_w6, new_w7, new_w8)
#     new_bias = (new_b1, new_b2)

#     decision = input("Please push Y to calculate one more step.\n")
#     if decision.lower() == 'y':
#         forward_propagation(inputs, new_weights, new_bias, y_values)

In [106]:
# # Start the process
# forward_propagation(inputs, weights, bias, y_values)

In [108]:
# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

# Loss function
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

for epoch in range(epochs):
    # Forward propagation
    z1 = np.dot(X_train, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2  # Output layer
    y_pred = z2

    # Compute loss
    loss = mean_squared_error(y_train, y_pred)
    loss_curve.append(loss)

    # Backward propagation
    d_loss_y_pred = 2 * (y_pred - y_train) / y_train.shape[0]
    dW2 = np.dot(a1.T, d_loss_y_pred)
    db2 = np.sum(d_loss_y_pred, axis=0, keepdims=True)

    da1 = np.dot(d_loss_y_pred, W2.T)
    dz1 = da1 * sigmoid_derivative(z1)
    dW1 = np.dot(X_train.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)

    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

# Test evaluation
z1_test = np.dot(X_test, W1) + b1
a1_test = sigmoid(z1_test)
z2_test = np.dot(a1_test, W2) + b2
y_test_pred = z2_test

test_mse = mean_squared_error(y_test, y_test_pred)
loss_curve[-5:], test_mse

([2645.771027479956,
  2642.313206481629,
  2638.9713377609232,
  2635.7384289015877,
  2632.6085836345915],
 2756.173932717753)