In [2]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Binarizer

# Activation functions and derivatives
sigmoid = lambda x: 1 / (1 + np.exp(-x))
relu = lambda x: np.maximum(0, x)
relu_derivative = lambda x: np.where(x > 0, 1, 0)

# Load and preprocess data
X, y = load_diabetes(return_X_y=True)
y = Binarizer(threshold=np.median(y)).fit_transform(y.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test = StandardScaler().fit_transform(X_train), StandardScaler().fit_transform(X_test)

# Initialize parameters
W1, b1 = np.random.randn(X_train.shape[1], 4), np.zeros((1, 4))
W2, b2 = np.random.randn(4, 1), np.zeros((1, 1))
lr, epochs = 0.01, 10000

# Training loop
for _ in range(epochs):
    H = relu(X_train @ W1 + b1)
    y_pred = sigmoid(H @ W2 + b2)
    loss = -np.mean(y_train * np.log(y_pred + 1e-8) + (1 - y_train) * np.log(1 - y_pred + 1e-8))

    # Backpropagation
    dW2, db2 = H.T @ (y_pred - y_train), np.sum(y_pred - y_train, axis=0, keepdims=True)
    dW1 = X_train.T @ ((y_pred - y_train) @ W2.T * relu_derivative(H))
    db1 = np.sum((y_pred - y_train) @ W2.T * relu_derivative(H), axis=0, keepdims=True)

    # Update weights
    W1, b1, W2, b2 = W1 - lr * dW1, b1 - lr * db1, W2 - lr * dW2, b2 - lr * db2

    if _ % 1000 == 0:
        print(f"Epoch {_}, Loss: {loss:.4f}")

# Evaluation
accuracy = np.mean((sigmoid(relu(X_test @ W1 + b1) @ W2 + b2) > 0.5).astype(int) == y_test)
print(f"Test Accuracy: {accuracy:.2f}")

Epoch 0, Loss: 1.4011
Epoch 1000, Loss: 0.4206
Epoch 2000, Loss: 0.4219
Epoch 3000, Loss: 0.3890
Epoch 4000, Loss: 0.3799
Epoch 5000, Loss: 0.3849
Epoch 6000, Loss: 0.3838
Epoch 7000, Loss: 0.3825
Epoch 8000, Loss: 0.3758
Epoch 9000, Loss: 0.3776
Test Accuracy: 0.75
