In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [3]:

# activation and loss function

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_loss(y, y_pred):  
    eps = 1e-8
    return -np.mean(
        y * np.log(y_pred + eps) +
        (1 - y) * np.log(1 - y_pred + eps)
    )


In [4]:
#Dataset


data = load_breast_cancer()
X = data.data
y = data.target.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

n, d = X_train.shape
epochs = 1000
lr = 0.01

In [5]:

# gradient decent

def train_gd(X, y):
    w = np.zeros((d,1))
    b = 0

    for _ in range(epochs):
        z = X @ w + b
        y_pred = sigmoid(z)

        dw = (1/n) * X.T @ (y_pred - y)
        db = np.mean(y_pred - y)

        w -= lr * dw
        b -= lr * db

    return compute_loss(y, sigmoid(X @ w + b))

In [6]:
#momentum

def train_momentum(X, y, beta=0.9):
    w = np.zeros((d,1))
    b = 0
    vw = np.zeros_like(w)
    vb = 0

    for _ in range(epochs):
        z = X @ w + b
        y_pred = sigmoid(z)

        dw = (1/n) * X.T @ (y_pred - y)
        db = np.mean(y_pred - y)

        vw = beta * vw + (1-beta) * dw
        vb = beta * vb + (1-beta) * db

        w -= lr * vw
        b -= lr * vb

    return compute_loss(y, sigmoid(X @ w + b))


In [7]:

#RMSProp

def train_rmsprop(X, y, beta=0.9):
    w = np.zeros((d,1))
    b = 0
    sw = np.zeros_like(w)
    sb = 0

    for _ in range(epochs):
        z = X @ w + b
        y_pred = sigmoid(z)

        dw = (1/n) * X.T @ (y_pred - y)
        db = np.mean(y_pred - y)

        sw = beta * sw + (1-beta) * (dw**2)
        sb = beta * sb + (1-beta) * (db**2)

        w -= lr * dw / (np.sqrt(sw) + 1e-8)
        b -= lr * db / (np.sqrt(sb) + 1e-8)

    return compute_loss(y, sigmoid(X @ w + b))

In [8]:

#Adam


def train_adam(X, y, beta1=0.9, beta2=0.999):
    w = np.zeros((d,1))
    b = 0
    mw = np.zeros_like(w)
    vw = np.zeros_like(w)
    mb = 0
    vb = 0

    for t in range(1, epochs+1):
        z = X @ w + b
        y_pred = sigmoid(z)

        dw = (1/n) * X.T @ (y_pred - y)
        db = np.mean(y_pred - y)

        # First moment
        mw = beta1 * mw + (1-beta1) * dw
        mb = beta1 * mb + (1-beta1) * db

        # Second moment
        vw = beta2 * vw + (1-beta2) * (dw**2)
        vb = beta2 * vb + (1-beta2) * (db**2)

        # Bias correction
        mw_hat = mw / (1 - beta1**t)
        mb_hat = mb / (1 - beta1**t)
        vw_hat = vw / (1 - beta2**t)
        vb_hat = vb / (1 - beta2**t)

        w -= lr * mw_hat / (np.sqrt(vw_hat) + 1e-8)
        b -= lr * mb_hat / (np.sqrt(vb_hat) + 1e-8)

    return compute_loss(y, sigmoid(X @ w + b))

In [9]:
print("Final Loss Comparison\n")

print("Gradient Descent :", train_gd(X_train, y_train))
print("Momentum         :", train_momentum(X_train, y_train))
print("RMSProp          :", train_rmsprop(X_train, y_train))
print("Adam             :", train_adam(X_train, y_train))

Final Loss Comparison

Gradient Descent : 0.10978535733799671
Momentum         : 0.10930729549774057
RMSProp          : 0.0196511206922355
Adam             : 0.04885187323045883
