In [None]:
import numpy as np

In [None]:
def sigmoid(z):
    """Elementwise logistic function σ(z) = 1 / (1 + e^{-z})."""
    return 1 / (1 + np.exp(-z))

In [None]:
def value(A, b, x, lam=0.0):
    r"""
    Logistic regression objective with L2 penalty:

        f(x) = -∑_i [ b_i log(p_i) + (1 - b_i) log(1 - p_i) ] + (lam/2) ||x||^2

    where p_i = sigmoid(a_i^T x),  b_i ∈ {0,1}.
    """
    A = np.asarray(A, dtype=float)
    b = np.asarray(b, dtype=float)
    x = np.asarray(x, dtype=float)

    z = A @ x                  # shape (n,)
    p = sigmoid(z)             # predicted probabilities

    eps = 1e-12                # numerical safety
    nll = -(b * np.log(p + eps) + (1 - b) * np.log(1 - p + eps)).sum()
    reg = 0.5 * lam * (x @ x)

    return nll + reg

In [None]:
def residual(A,b,x):
    """r(x) = A x - b"""
    return A @ x - b

In [None]:
def gradient(A, b, x, lam=0.0):
    r"""
    Gradient of the logistic objective:

        ∇f(x) = A^T (p - b) + lam * x

    where p = sigmoid(Ax),  b ∈ {0,1}.
    """
    A = np.asarray(A, dtype=float)
    b = np.asarray(b, dtype=float)
    x = np.asarray(x, dtype=float)

    z = A @ x
    p = sigmoid(z)

    return A.T @ (p - b) + lam * x

In [None]:
def steepestDescentLogReg(
        A, b, x0, lam=0.0, mode="armijo", alpha_fixed=1e-2,
        armijo_c=1e-4, rho=0.5, tol=1e-8, maxit=10_000
):
    """
    Steepest descent for logistic regression with L2 penalty.

    Minimizes
        f(x) = -∑ [b_i log p_i + (1 - b_i) log(1 - p_i)] + (lam/2)||x||^2,
    where p = sigmoid(Ax), b ∈ {0,1}.

    mode: "fixed" | "armijo"
      - "fixed"  : use a constant step size alpha_fixed.
      - "armijo" : backtracking line search with Armijo condition.
    """
    A = np.asarray(A, dtype=float)
    b = np.asarray(b, dtype=float)
    x = np.asarray(x0, dtype=float).copy()

    fx = value(A, b, x, lam=lam)

    for _ in range(maxit):
        g = gradient(A, b, x, lam=lam)
        gnorm = np.linalg.norm(g)

        if gnorm < tol:
            break

        if mode == "fixed":
            alpha = alpha_fixed

        elif mode == "armijo":
            alpha = 1.0
            # Armijo condition: f(x - αg) ≤ f(x) - c α ||g||²
            while value(A, b, x - alpha * g, lam=lam) > fx - armijo_c * alpha * (gnorm ** 2):
                alpha *= rho
                if alpha < 1e-16:
                    # step size underflow; treat as convergence
                    break

        else:
            raise ValueError('mode must be "fixed" or "armijo"')

        # gradient step
        x = x - alpha * g
        fx = value(A, b, x, lam=lam)

    return x, fx