In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# ------------------------ Data Preprocessing ------------------------ #
iris = load_iris()
X = iris.data  # Features
y = iris.target.reshape(-1, 1)  # Labels (0, 1, 2)

# One-hot encode the target labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

# Standardize input features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# ------------------------ Activation Functions ------------------------ #
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(z):
    exps = np.exp(z - np.max(z, axis=1, keepdims=True))  # Stability
    return exps / np.sum(exps, axis=1, keepdims=True)

# ------------------------ Loss Function ------------------------ #
def cross_entropy(predictions, targets):
    return -np.mean(np.sum(targets * np.log(predictions + 1e-9), axis=1))

# ------------------------ Initialization ------------------------ #
input_size = X.shape[1]       # 4 features
hidden_size = 100             # as per instruction
output_size = 3               # 3 classes

np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

# ------------------------ Training Loop ------------------------ #
epochs = 1000
lr = 0.1

for epoch in range(epochs):
    # --------- Forward Pass ---------
    z1 = np.dot(X_train, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    output = softmax(z2)

    # --------- Loss ---------
    loss = cross_entropy(output, y_train)

    # --------- Backpropagation ---------
    error_output = output - y_train  # derivative of cross-entropy w.r.t. softmax
    dW2 = np.dot(a1.T, error_output)
    db2 = np.sum(error_output, axis=0, keepdims=True)

    error_hidden = np.dot(error_output, W2.T) * relu_derivative(z1)
    dW1 = np.dot(X_train.T, error_hidden)
    db1 = np.sum(error_hidden, axis=0, keepdims=True)

    # --------- Update Parameters ---------
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Loss: {loss:.4f}")

# ------------------------ Evaluation ------------------------ #
z1_test = np.dot(X_test, W1) + b1
a1_test = relu(z1_test)
z2_test = np.dot(a1_test, W2) + b2
output_test = softmax(z2_test)

predicted_classes = np.argmax(output_test, axis=1)
true_classes = np.argmax(y_test, axis=1)

accuracy = np.mean(predicted_classes == true_classes)
print(f"\nTest Accuracy: {accuracy * 100:.2f}%")


Epoch 0 | Loss: 1.0994
Epoch 100 | Loss: 1.3266
Epoch 200 | Loss: 1.3266
Epoch 300 | Loss: 1.3266
Epoch 400 | Loss: 1.3266
Epoch 500 | Loss: 1.3266
Epoch 600 | Loss: 1.3266
Epoch 700 | Loss: 1.3266
Epoch 800 | Loss: 1.3266
Epoch 900 | Loss: 1.3266

Test Accuracy: 66.67%
