In [21]:
import numpy as np

# --- Activation Functions ---
def sigmoid(z):
    """Sigmoid activation function (for the output layer)."""
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    """Derivative of the sigmoid function."""
    return z * (1 - z)

def relu(z):
    """ReLU activation function (for the hidden layer)."""
    return np.maximum(0, z)

def relu_derivative(z):
    """Derivative of the ReLU function."""
    return np.where(z > 0, 1, 0)

# --- 1. Load the Prepared Data ---
X = np.load('features_X.npy')
Y = np.load('labels_Y.npy')

# --- 2. Define the Neural Network Architecture ---
input_neurons = X.shape[1]
hidden_neurons = 64
output_neurons = 1

# --- 3. Initialize Weights and Biases ---
# Initializing with randn (mean 0) and scaling down helps prevent saturation
wh = np.random.randn(input_neurons, hidden_neurons) * 0.01
bh = np.zeros((1, hidden_neurons))
wout = np.random.randn(hidden_neurons, output_neurons) * 0.01
bout = np.zeros((1, output_neurons))

# --- 4. Train the Network ---
learning_rate = 0.1
epochs = 1000

for i in range(epochs):

    # --- Forward Propagation ---
    # Phase 1: Input to Hidden Layer (NOW USING RELU)
    hidden_layer_input = np.dot(X, wh) + bh
    hidden_layer_activations = relu(hidden_layer_input) # ✅ CHANGE

    # Phase 2: Hidden Layer to Output (Still Sigmoid)
    output_layer_input = np.dot(hidden_layer_activations, wout) + bout
    predicted_output = sigmoid(output_layer_input)

    # --- Backward Propagation ---
    error = Y - predicted_output
    
    # Calculate gradients for the output layer
    d_predicted_output = error * sigmoid_derivative(predicted_output)
    
    # Calculate gradients for the hidden layer (NOW USING RELU DERIVATIVE)
    error_hidden_layer = d_predicted_output.dot(wout.T)
    d_hidden_layer = error_hidden_layer * relu_derivative(hidden_layer_input) # ✅ CHANGE

    # --- Update Weights and Biases ---
    wout += hidden_layer_activations.T.dot(d_predicted_output) * learning_rate
    bout += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
    wh += X.T.dot(d_hidden_layer) * learning_rate
    bh += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate

    # Print the loss to see the progress
    if i % 100 == 0:
        loss = np.mean(np.square(Y - predicted_output))
        print(f"Epoch {i}, Loss: {loss:.6f}")


# --- 5. Display Final Predictions ---
print("\n--- Training Complete ---")
print("Final Predicted Output:")
print(predicted_output)
print("\nActual Labels:")
print(Y)
print("\nThe predictions should now be much closer to the actual labels!")

Epoch 0, Loss: 0.250276
Epoch 100, Loss: 0.064196
Epoch 200, Loss: 0.028601
Epoch 300, Loss: 0.016929
Epoch 400, Loss: 0.011642
Epoch 500, Loss: 0.008731
Epoch 600, Loss: 0.006921
Epoch 700, Loss: 0.005698
Epoch 800, Loss: 0.004823
Epoch 900, Loss: 0.004168

--- Training Complete ---
Final Predicted Output:
[[0.08469835]
 [0.98751627]]

Actual Labels:
[[0]
 [1]]

The predictions should now be much closer to the actual labels!
