In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

np.random.seed(42)

# Activation functions
def relu(x): return np.maximum(0, x)
def relu_derivative(x): return (x > 0).astype(float)

def sigmoid(x): return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x): return sigmoid(x) * (1 - sigmoid(x))

def tanh(x): return np.tanh(x)
def tanh_derivative(x): return 1 - np.tanh(x)**2

# Deep ANN class
class DeepNeuralNetwork:
    def __init__(self, layer_sizes, activation='relu'):
        self.num_layers = len(layer_sizes) - 1
        self.activation_name = activation
        self.params = {}

        # Activation function
        if activation == 'relu':
            self.act = relu
            self.act_deriv = relu_derivative
            init_factor = lambda n: np.sqrt(2. / n)
        elif activation == 'sigmoid':
            self.act = sigmoid
            self.act_deriv = sigmoid_derivative
            init_factor = lambda n: np.sqrt(1. / n)
        elif activation == 'tanh':
            self.act = tanh
            self.act_deriv = tanh_derivative
            init_factor = lambda n: np.sqrt(1. / n)
        else:
            raise ValueError("Unsupported activation")

        # Initialize weights and biases
        for i in range(self.num_layers):
            n_in, n_out = layer_sizes[i], layer_sizes[i+1]
            self.params[f'W{i+1}'] = np.random.randn(n_in, n_out) * init_factor(n_in)
            self.params[f'b{i+1}'] = np.zeros((1, n_out))

    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def forward(self, X):
        self.cache = {'A0': X}
        for i in range(1, self.num_layers):
            Z = self.cache[f'A{i-1}'] @ self.params[f'W{i}'] + self.params[f'b{i}']
            A = self.act(Z)
            self.cache[f'Z{i}'] = Z
            self.cache[f'A{i}'] = A
        # Output layer (softmax)
        Z = self.cache[f'A{self.num_layers-1}'] @ self.params[f'W{self.num_layers}'] + self.params[f'b{self.num_layers}']
        A = self.softmax(Z)
        self.cache[f'Z{self.num_layers}'] = Z
        self.cache[f'A{self.num_layers}'] = A
        return A

    def backward(self, X, y, lr):
        m = X.shape[0]
        L = self.num_layers
        dZ = self.cache[f'A{L}'] - y

        for i in reversed(range(1, L + 1)):
            dW = self.cache[f'A{i-1}'].T @ dZ / m
            db = np.sum(dZ, axis=0, keepdims=True) / m

            self.params[f'W{i}'] -= lr * dW
            self.params[f'b{i}'] -= lr * db

            if i > 1:
                dA_prev = dZ @ self.params[f'W{i}'].T
                dZ = dA_prev * self.act_deriv(self.cache[f'Z{i-1}'])

    def train(self, X, y, epochs=1000, lr=0.1):
        for epoch in range(epochs):
            output = self.forward(X)
            loss = -np.mean(np.sum(y * np.log(output + 1e-9), axis=1))
            self.backward(X, y, lr)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

In [None]:
# Load dataset
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)

In [None]:
# Define a deep architecture: 4 hidden layers
layer_sizes = [4, 32, 64, 32, 16, 3]  # Input:4, Hidden:4 layers, Output:3

# Try different activations
for activation in ['relu', 'tanh', 'sigmoid']:
    print(f"\nTraining with activation: {activation.upper()}")
    model = DeepNeuralNetwork(layer_sizes, activation=activation)
    model.train(X_train, y_train, epochs=1000, lr=0.05)
    preds = model.predict(X_test)
    accuracy = np.mean(np.argmax(y_test, axis=1) == preds)
    print(f"Test Accuracy with {activation.upper()}: {accuracy * 100:.2f}%")



Training with activation: RELU
Epoch 0, Loss: 1.4050
Epoch 100, Loss: 0.1377
Epoch 200, Loss: 0.0835
Epoch 300, Loss: 0.0616
Epoch 400, Loss: 0.0457
Epoch 500, Loss: 0.0361
Epoch 600, Loss: 0.0289
Epoch 700, Loss: 0.0232
Epoch 800, Loss: 0.0184
Epoch 900, Loss: 0.0140
Test Accuracy with RELU: 100.00%

Training with activation: TANH
Epoch 0, Loss: 1.1415
Epoch 100, Loss: 0.2430
Epoch 200, Loss: 0.1286
Epoch 300, Loss: 0.0897
Epoch 400, Loss: 0.0714
Epoch 500, Loss: 0.0625
Epoch 600, Loss: 0.0572
Epoch 700, Loss: 0.0535
Epoch 800, Loss: 0.0505
Epoch 900, Loss: 0.0478
Test Accuracy with TANH: 97.78%

Training with activation: SIGMOID
Epoch 0, Loss: 1.1887
Epoch 100, Loss: 1.0961
Epoch 200, Loss: 1.0954
Epoch 300, Loss: 1.0947
Epoch 400, Loss: 1.0940
Epoch 500, Loss: 1.0933
Epoch 600, Loss: 1.0924
Epoch 700, Loss: 1.0915
Epoch 800, Loss: 1.0904
Epoch 900, Loss: 1.0892
Test Accuracy with SIGMOID: 35.56%
