In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [None]:
# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target.reshape(-1, 1)  # Labels (reshape to make it a column vector)

In [None]:
# One-Hot Encode the target variable
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y)

In [None]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [86]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)


In [88]:
# Initialize neural network parameters
input_size = X.shape[1]  # 4 features
hidden_size = 6          # 6 neurons in hidden layer
output_size = y_onehot.shape[1]  # 3 classes (setosa, versicolor, virginica)
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [89]:
# Sigmoid activation function and derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [93]:
# Define hyperparameters
epochs = 5000
learning_rate = 0.01  # Define the learning rate

# Training loop
for epoch in range(epochs):
    # **Forward Propagation**
    hidden_layer_input = np.dot(X_train, W1) + b1
    hidden_layer_output = sigmoid(hidden_layer_input)

    output_layer_input = np.dot(hidden_layer_output, W2) + b2
    predicted_output = sigmoid(output_layer_input)

    # **Compute Error**
    error = y_train - predicted_output

    # **Backward Propagation**
    d_output = error * sigmoid_derivative(predicted_output)
    error_hidden = d_output.dot(W2.T)
    d_hidden = error_hidden * sigmoid_derivative(hidden_layer_output)

    # **Update Weights and Biases**
    W2 += hidden_layer_output.T.dot(d_output) * learning_rate
    b2 += np.sum(d_output, axis=0, keepdims=True) * learning_rate
    W1 += X_train.T.dot(d_hidden) * learning_rate
    b1 += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Print loss every 500 epochs
    if epoch % 500 == 0:
        loss = np.mean(np.abs(error))
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 0.4231
Epoch 500, Loss: 0.1090
Epoch 1000, Loss: 0.0696
Epoch 1500, Loss: 0.0564
Epoch 2000, Loss: 0.0494
Epoch 2500, Loss: 0.0450
Epoch 3000, Loss: 0.0419
Epoch 3500, Loss: 0.0396
Epoch 4000, Loss: 0.0378
Epoch 4500, Loss: 0.0364
