In [1]:
import numpy as np

In [None]:
class LogisticRegression:
    def __init__(self, input_dim, hidden_dim, lr=0.01, seed=42):
        self.lr = lr

        # Xavier initialization
        self.W1 = np.random.randn(input_dim, hidden_dim) / np.sqrt(input_dim) 
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, 1) / np.sqrt(hidden_dim)
        self.b2 = np.zeros((1, 1))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_deriv(self, a):
        return a * (1 - a)

    def relu(self, z):
        return np.maximum(0, z)

    def relu_deriv(self, z):
        return np.where(z > 0, 1, 0)

    def forward(self, X):
        z1 = X @ self.W1 + self.b1 # (m, hidden_dim)    
        a1 = self.relu(z1) # (m, hidden_dim)
        z2 = a1 @ self.W2 + self.b2 # (m, 1)
        a2 = self.sigmoid(z2) # (m, 1)

        cache = {
            'X': X,
            'z1': z1,
            'a1': a1,
            'z2': z2,
            'a2': a2
        }
        return a2, cache

    def compute_loss(self, y, y_pred):
        eps = 1e-10
        y_pred = np.clip(y_pred, eps, 1 - eps)
        loss = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
        return loss
    
    def backward(self, X, y, cache):
        m = X.shape[0]
        z1, a1, z2, a2 = cache['z1'], cache['a1'], cache['z2'], cache['a2']

        da2 = - (y / a2 - (1 - y) / (1 - a2)) # (m, 1)  
        dz2 = da2 * self.sigmoid_deriv(a2) # (m, 1)
        # dz2 = a2 - y # (m, 1)
        dW2 = (1 / m) * a1.T @ dz2 # (hidden_dim, 1)
        db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True) # (1, 1)

        da1 = dz2 @ self.W2.T # (m, hidden_dim)
        dz1 = da1 * self.relu_deriv(z1) # (m, hidden_dim)
        dW1 = (1 / m) * X.T @ dz1 # (input_dim, hidden_dim)
        db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True) # (1, hidden_dim)

        grads = {
            'dW1': dW1,
            'db1': db1,
            'dW2': dW2,
            'db2': db2
        }
        return grads

    def update(self, grads):
        self.W1 -= self.lr * grads['dW1']
        self.b1 -= self.lr * grads['db1']
        self.W2 -= self.lr * grads['dW2']
        self.b2 -= self.lr * grads['db2']

    def train(self, X, y, num_epochs=100, batch_size=32):        
        for epoch in range(num_epochs):
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                y_pred,cache = self.forward(X_batch)
                grads = self.backward(X_batch, y_batch, cache)
                self.update(grads)

            if epoch % 10 == 0:
                y_pred_full, _ = self.forward(X)
                loss = self.compute_loss(y, y_pred_full)
                print(f"Epoch {epoch}, Loss: {loss}")

        return y_pred

    def predict(self, X):
        y_pred, _ = self.forward(X)
        return (y_pred >= 0.5).astype(int)

    def predict_proba(self, X):
        y_pred, _ = self.forward(X)
        return y_pred


In [3]:
from sklearn.datasets import fetch_openml
X, y = fetch_openml("ionosphere", as_frame=False, return_X_y=True)
y = (y == 'g').astype(int)  # 1=good, 0=bad
y = y.reshape(-1, 1)


In [4]:
model = LogisticRegression(X.shape[1], 16)
model.train(X, y)

y_pred = model.predict(X)
print(f"Accuracy: {np.mean(y_pred == y)}")



Epoch 0, Loss: 0.8350148518967819
Epoch 10, Loss: 0.6224704277323438
Epoch 20, Loss: 0.5614645471479323
Epoch 30, Loss: 0.5217776985793212
Epoch 40, Loss: 0.486955207620354
Epoch 50, Loss: 0.4511582618934919
Epoch 60, Loss: 0.4166471147038429
Epoch 70, Loss: 0.38566884439148996
Epoch 80, Loss: 0.3589761483878302
Epoch 90, Loss: 0.3358991446060723
Accuracy: 0.9002849002849003
