# ECG Neural Network Training with Different Activations and Optimizers

In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

# Load ECG Dataset
ecg_data = fetch_openml(name="ECG5000", version=1, as_frame=False)
X, y = ecg_data['data'], ecg_data['target'].astype(int)

# Binary classification: class 1 vs others
y = (y == 1).astype(int).reshape(-1, 1)

# Normalize input features
X = StandardScaler().fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Activation Functions
def sigmoid(z): return 1 / (1 + np.exp(-z))
def dsigmoid(a): return a * (1 - a)

def tanh(z): return np.tanh(z)
def dtanh(a): return 1 - np.power(a, 2)

def relu(z): return np.maximum(0, z)
def drelu(a): return (a > 0).astype(float)

def leaky_relu(z, alpha=0.01): return np.where(z > 0, z, z * alpha)
def dleaky_relu(a, alpha=0.01): return np.where(a > 0, 1, alpha)

def elu(z, alpha=1): return np.where(z > 0, z, alpha * (np.exp(z) - 1))
def delu(a, alpha=1): return np.where(a > 0, 1, a + alpha)

def prelu(z, alpha=0.25): return np.where(z > 0, z, alpha * z)
def dprelu(a, alpha=0.25): return np.where(a > 0, 1, alpha)

def selu(z, alpha=1.67326, scale=1.0507):
    return scale * np.where(z > 0, z, alpha * (np.exp(z) - 1))
def dselu(a, alpha=1.67326, scale=1.0507):
    return np.where(a > 0, scale, scale * alpha * np.exp(a))

activations = {
    "sigmoid": (sigmoid, dsigmoid),
    "tanh": (tanh, dtanh),
    "relu": (relu, drelu),
    "leaky_relu": (leaky_relu, dleaky_relu),
    "elu": (elu, delu),
    "prelu": (prelu, dprelu),
    "selu": (selu, dselu),
}

In [None]:
# Loss Function
def binary_cross_entropy(y_true, y_pred):
    y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def dbce(y_true, y_pred):
    return (y_pred - y_true) / (y_pred * (1 - y_pred) + 1e-9)

In [None]:
# Optimizers
def gradient_descent(params, grads, lr):
    return [p - lr * g for p, g in zip(params, grads)]

def stochastic_gradient_descent(params, grads, lr):
    return [p - lr * g for p, g in zip(params, grads)]

def mini_batch_sgd(params, grads, lr):
    return [p - lr * g for p, g in zip(params, grads)]

In [None]:
# Neural Network Class
class SimpleANN:
    def __init__(self, input_dim, hidden_dim, activation_name, optimizer_name, lr=0.1):
        self.activation_name = activation_name
        self.optimizer_name = optimizer_name
        self.lr = lr
        self.act, self.dact = activations[activation_name]
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.1
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, 1) * 0.1
        self.b2 = np.zeros((1, 1))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = self.act(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = sigmoid(self.Z2)
        return self.A2

    def backward(self, X, y, output):
        dA2 = dbce(y, output)
        dZ2 = dA2 * dsigmoid(output)
        dW2 = self.A1.T @ dZ2
        db2 = np.sum(dZ2, axis=0, keepdims=True)
        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.dact(self.A1)
        dW1 = X.T @ dZ1
        db1 = np.sum(dZ1, axis=0, keepdims=True)
        return [dW1, db1, dW2, db2]

    def update(self, grads):
        params = [self.W1, self.b1, self.W2, self.b2]
        if self.optimizer_name == 'gd':
            updated = gradient_descent(params, grads, self.lr)
        elif self.optimizer_name == 'sgd':
            updated = stochastic_gradient_descent(params, grads, self.lr)
        elif self.optimizer_name == 'mini-batch':
            updated = mini_batch_sgd(params, grads, self.lr)
        self.W1, self.b1, self.W2, self.b2 = updated

    def train(self, X, y, epochs=100, batch_size=None):
        loss_list = []
        for epoch in range(epochs):
            if batch_size:
                indices = np.random.permutation(len(X))
                X, y = X[indices], y[indices]
                for i in range(0, len(X), batch_size):
                    X_batch = X[i:i+batch_size]
                    y_batch = y[i:i+batch_size]
                    output = self.forward(X_batch)
                    grads = self.backward(X_batch, y_batch, output)
                    self.update(grads)
            else:
                output = self.forward(X)
                grads = self.backward(X, y, output)
                self.update(grads)
            y_pred = self.forward(X)
            loss = binary_cross_entropy(y, y_pred)
            loss_list.append(loss)
        return loss_list

    def predict(self, X):
        probs = self.forward(X)
        return (probs > 0.5).astype(int)

In [None]:
# Run Experiments
combinations = [
    ('sigmoid', 'gd'),
    ('relu', 'gd'),
    ('tanh', 'sgd'),
    ('leaky_relu', 'mini-batch'),
    ('prelu', 'mini-batch'),
    ('elu', 'sgd'),
    ('selu', 'gd')
]

results = {}

for act_name, opt_name in combinations:
    print(f"Training with {act_name.upper()} + {opt_name.upper()}...")
    model = SimpleANN(input_dim=X.shape[1], hidden_dim=10, activation_name=act_name, optimizer_name=opt_name, lr=0.1)
    batch = 32 if opt_name == 'mini-batch' else None
    loss_curve = model.train(X_train, y_train, epochs=100, batch_size=batch)
    y_pred = model.predict(X_test)
    acc = np.mean(y_pred == y_test)
    results[f"{act_name}_{opt_name}"] = (loss_curve, acc)

# Plot Individual Loss Curves and Global Minimum
for name, (losses, acc) in results.items():
    plt.figure(figsize=(6, 4))
    plt.plot(losses, label=f"{name} (Acc: {acc:.2f})")
    plt.scatter(np.argmin(losses), min(losses), color='red', label=f"Min Loss = {min(losses):.4f}")
    plt.title(f"Loss Curve - {name}")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.show()