In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

np.random.seed(42)  # for reproducibility

# Activation functions and their derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Neural Network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, activation='sigmoid'):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

        if activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_deriv = sigmoid_derivative
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_deriv = tanh_derivative
        elif activation == 'relu':
            self.activation = relu
            self.activation_deriv = relu_derivative
        else:
            raise ValueError("Unsupported activation")

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = self.activation(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = self.softmax(self.Z2)
        return self.A2

    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def backward(self, X, y, output, lr):
        m = y.shape[0]
        dZ2 = output - y
        dW2 = self.A1.T @ dZ2 / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.activation_deriv(self.Z1)
        dW1 = X.T @ dZ1 / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        self.W1 -= lr * dW1
        self.b1 -= lr * db1
        self.W2 -= lr * dW2
        self.b2 -= lr * db2

    def train(self, X, y, epochs=1000, lr=0.1):
        for i in range(epochs):
            output = self.forward(X)
            loss = -np.mean(np.sum(y * np.log(output + 1e-9), axis=1))
            self.backward(X, y, output, lr)
            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

# Load and preprocess Iris dataset
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)

# Train with different activation functions
for act in ['sigmoid', 'tanh', 'relu']:
    print(f"\nTraining with activation function: {act.upper()}")
    nn = NeuralNetwork(input_size=4, hidden_size=10, output_size=3, activation=act)
    nn.train(X_train, y_train, epochs=1000, lr=0.1)
    predictions = nn.predict(X_test)
    accuracy = np.mean(np.argmax(y_test, axis=1) == predictions)
    print(f"Accuracy with {act.upper()}: {accuracy * 100:.2f}%")


Training with activation function: SIGMOID
Epoch 0, Loss: 1.2620
Epoch 100, Loss: 0.3924
Epoch 200, Loss: 0.3132
Epoch 300, Loss: 0.2707
Epoch 400, Loss: 0.2398
Epoch 500, Loss: 0.2147
Epoch 600, Loss: 0.1935
Epoch 700, Loss: 0.1755
Epoch 800, Loss: 0.1603
Epoch 900, Loss: 0.1476
Accuracy with SIGMOID: 100.00%

Training with activation function: TANH
Epoch 0, Loss: 3.0807
Epoch 100, Loss: 0.1622
Epoch 200, Loss: 0.1147
Epoch 300, Loss: 0.0942
Epoch 400, Loss: 0.0827
Epoch 500, Loss: 0.0753
Epoch 600, Loss: 0.0701
Epoch 700, Loss: 0.0663
Epoch 800, Loss: 0.0634
Epoch 900, Loss: 0.0611
Accuracy with TANH: 100.00%

Training with activation function: RELU
Epoch 0, Loss: 1.3143
Epoch 100, Loss: 0.2075
Epoch 200, Loss: 0.1396
Epoch 300, Loss: 0.1072
Epoch 400, Loss: 0.0877
Epoch 500, Loss: 0.0756
Epoch 600, Loss: 0.0684
Epoch 700, Loss: 0.0628
Epoch 800, Loss: 0.0584
Epoch 900, Loss: 0.0548
Accuracy with RELU: 97.78%
