In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [14]:
iris = load_iris()
X = iris.data
y = iris.target

In [18]:
# Convert labels to one-hot encoding
y_onehot = np.zeros((y.size, y.max() + 1))
y_onehot[np.arange(y.size), y] = 1

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_onehot, test_size=0.2, random_state=42)

In [19]:
#Define the activation functions and their derivatives
# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

In [20]:
def cross_entropy_loss(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))

In [21]:
# Initialize weights and biases for a 3-layer neural network
input_size = X_train.shape[1]
hidden_size = 5
output_size = y_train.shape[1]

# Initialize weights and biases
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

In [22]:
#Define forward propagation
def forward(X):
    global A1, Z1, A2, Z2
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return A2

In [23]:
#Define backward propagation
def backward(X, y, learning_rate):
    global W1, b1, W2, b2
    m = y.shape[0]
    
    # Compute gradients
    dZ2 = A2 - y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * sigmoid_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    
    # Update weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

In [24]:
epochs = 1000
learning_rate = 0.01

for epoch in range(epochs):
    A2 = forward(X_train)
    loss = cross_entropy_loss(y_train, A2)
    backward(X_train, y_train, learning_rate)
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

Epoch 0, Loss: 2.1039450997708835
Epoch 100, Loss: 1.340638268054594
Epoch 200, Loss: 1.0543186858641334
Epoch 300, Loss: 0.9170783138446945
Epoch 400, Loss: 0.8249687729694227
Epoch 500, Loss: 0.7564097241261364
Epoch 600, Loss: 0.7033927084612953
Epoch 700, Loss: 0.661371673798101
Epoch 800, Loss: 0.6273460087042593
Epoch 900, Loss: 0.5992491560062295


In [25]:
def predict(X):
    A2 = forward(X)
    return np.argmax(A2, axis=1)

y_pred = predict(X_test)
y_true = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_true)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 90.00%
