<a href="https://colab.research.google.com/github/Sreys10/ann/blob/main/07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
# Activation functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_vals = np.exp(x - np.max(x, axis=1, keepdims=True))  # stability
    return exp_vals / np.sum(exp_vals, axis=1, keepdims=True)

In [3]:
# Loss function
def cross_entropy(y_true, y_pred):
    n = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / n

In [4]:
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

In [5]:
# Load Iris dataset
iris = load_iris()
X = iris.data[:, :3]  # Use only first 3 features
y = iris.target.reshape(-1, 1)

In [6]:
# One-hot encode the labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

In [7]:
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

In [9]:

# Initialize architecture
np.random.seed(42)
n_input = X.shape[1]         # 3 features
n_hidden = 5
n_output = 3                 # 3 classes

In [10]:
# Weights and biases
W1 = np.random.randn(n_input, n_hidden) * 0.1
b1 = np.zeros((1, n_hidden))
W2 = np.random.randn(n_hidden, n_output) * 0.1
b2 = np.zeros((1, n_output))

In [15]:

# Training loop
epochs = 100
lr = 0.05

In [16]:
for epoch in range(epochs):
    # Forward pass
    z1 = np.dot(X_train, W1) + b1
    a1 = relu(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = softmax(z2)

    # Loss and accuracy
    loss = cross_entropy(y_train, a2)
    acc = accuracy(y_train, a2)

    # Backpropagation
    dz2 = a2 - y_train
    dW2 = np.dot(a1.T, dz2)
    db2 = np.sum(dz2, axis=0, keepdims=True)

    dz1 = np.dot(dz2, W2.T) * relu_derivative(a1)
    dW1 = np.dot(X_train.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)

    # Gradient descent update
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

    if epoch % 1 == 0:
        print(f"Epoch {epoch}: Loss = {loss:.4f}, Accuracy = {acc:.4f}")


Epoch 0: Loss = 0.0929, Accuracy = 0.9619
Epoch 1: Loss = 0.0929, Accuracy = 0.9619
Epoch 2: Loss = 0.0929, Accuracy = 0.9619
Epoch 3: Loss = 0.0929, Accuracy = 0.9619
Epoch 4: Loss = 0.0929, Accuracy = 0.9619
Epoch 5: Loss = 0.0929, Accuracy = 0.9619
Epoch 6: Loss = 0.0929, Accuracy = 0.9619
Epoch 7: Loss = 0.0929, Accuracy = 0.9619
Epoch 8: Loss = 0.0929, Accuracy = 0.9619
Epoch 9: Loss = 0.0929, Accuracy = 0.9619
Epoch 10: Loss = 0.0929, Accuracy = 0.9619
Epoch 11: Loss = 0.0929, Accuracy = 0.9619
Epoch 12: Loss = 0.0929, Accuracy = 0.9619
Epoch 13: Loss = 0.0929, Accuracy = 0.9619
Epoch 14: Loss = 0.0929, Accuracy = 0.9619
Epoch 15: Loss = 0.0929, Accuracy = 0.9619
Epoch 16: Loss = 0.0929, Accuracy = 0.9619
Epoch 17: Loss = 0.0929, Accuracy = 0.9619
Epoch 18: Loss = 0.0929, Accuracy = 0.9619
Epoch 19: Loss = 0.0929, Accuracy = 0.9619
Epoch 20: Loss = 0.0929, Accuracy = 0.9619
Epoch 21: Loss = 0.0929, Accuracy = 0.9619
Epoch 22: Loss = 0.0929, Accuracy = 0.9619
Epoch 23: Loss = 0.09

In [17]:
# Test accuracy
z1_test = np.dot(X_test, W1) + b1
a1_test = relu(z1_test)
z2_test = np.dot(a1_test, W2) + b2
a2_test = softmax(z2_test)

In [18]:
print("\nTest Accuracy:", accuracy(y_test, a2_test))



Test Accuracy: 0.9777777777777777
