In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
from functools import partial

%matplotlib inline

In [2]:
np.random.seed(42)

n_samples = 10000
n_dims = 10

X = np.random.randn(n_samples, n_dims)

beta_true = np.asarray([0]*(n_dims // 2) + [1] * (n_dims - n_dims//2))

# Generate target variable
y = np.random.binomial(1, 1 / (1 + np.exp(-X.dot(beta_true))))

# Split the data into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.5, random_state=42)
X_train_labeled, X_train_unlabeled, y_train_labeled, _ = train_test_split(X_train, y_train, test_size=0.5, random_state=42)
X_valid, X_holdout, y_valid, y_holdout = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [3]:
def log1pexp(x):
    """
    Numerically accurate evaluation of log(1 + exp(x)).
    
    This function computes log(1 + exp(x)) in a way that is numerically stable
    for both large positive and large negative values of x.
    
    Args:
    x (np.ndarray): Input array
    
    Returns:
    np.ndarray: log(1 + exp(x)) computed in a numerically stable way
    """
    # For large positive x, log(1 + exp(x)) ≈ x
    # For x near zero, we use the direct computation
    # For large negative x, we use exp(x) to avoid overflow
    threshold = np.log(np.finfo(x.dtype).max) - 1e-5
    
    result = np.where(
        x > threshold,
        x,
        np.where(
            x > -threshold,
            np.log1p(np.exp(-np.abs(x))) + np.maximum(x, 0),
            np.exp(x)
        )
    )
    
    return result

In [4]:
def sigmoid(x):
    """
    Numerically stable sigmoid function.

    This function computes the sigmoid of x in a way that is numerically stable
    for both large positive and large negative values of x.

    Args:
    x (np.ndarray): Input array

    Returns:
    np.ndarray: Sigmoid of x computed in a numerically stable way
    """
    # For large negative x, sigmoid(x) ≈ exp(x)
    # For large positive x, sigmoid(x) ≈ 1 - exp(-x)
    # For x near zero, we use the standard formula
    
    mask = x >= 0
    z = np.exp(-np.abs(x))
    
    return np.where(mask, 1 / (1 + z), z / (1 + z))

In [5]:
def logistic_loss(X, y, temp, beta):
    # X: [n, d], beta: [d]
    z = X @ beta / temp
    loss = np.mean(-y * z + log1pexp(z))
    return loss

In [6]:
def grad_logistic_loss(X, y, temp, beta):
    """
    Calculate the gradient of logistic loss with respect to beta.
    
    Args:
    X (np.ndarray): Feature matrix (n_features, n_samples)
    y (np.ndarray): Target vector (n_samples,)
    beta (np.ndarray): Coefficient vector (n_features,)
    
    Returns:
    np.ndarray: Gradient vector (n_features,)
    """
    z = (X @ beta) / temp
    sigmoid_z = sigmoid(z)
    grad_loss = (X.T @ (sigmoid_z - y)) / len(y)
    return grad_loss

In [7]:
def optimize_logistic_regression(X, y, temp: float=1):
    """
    Optimize the logistic regression parameters using L-BFGS.
    
    Args:
    X (np.array): Feature matrix (N x D)
    y (np.array): True binary labels (N,)
    
    Returns:
    np.array: Optimized parameters
    """
    # Initialize beta (parameters)
    initial_beta = np.zeros(X.shape[1])
    loss_func = partial(logistic_loss, X, y, temp)
    grad_func = partial(grad_logistic_loss, X, y, temp)
    print(initial_beta.shape)
    # Optimize using L-BFGS
    result = minimize(
        fun=loss_func,
        x0=initial_beta,
        method='BFGS',
        jac=grad_func,
        options={'disp': True}
    )

    return result.x

In [8]:
X_train @ np.zeros(X.shape[1])

array([0., 0., 0., ..., 0., 0., 0.])

In [9]:
beta_hat = optimize_logistic_regression(X_holdout, y_holdout)

(10,)
Optimization terminated successfully.
         Current function value: 0.428228
         Iterations: 15
         Function evaluations: 16
         Gradient evaluations: 16


In [10]:
beta_hat

array([ 0.03632113, -0.04375023, -0.0967291 ,  0.00931281,  0.0899625 ,
        1.07567776,  1.00714916,  0.89664905,  1.01231026,  0.96359944])

In [11]:
beta_true

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

In [12]:
def ppi_logistic_loss(X_labeled, y_labeled, X_unlabeled, beta_hat, temp, beta):
    loss_unlabeled = logistic_loss(X_unlabeled, X_unlabeled @ beta_hat, temp, beta)
    loss_correction = logistic_loss(X_labeled, y_labeled, temp, beta) - logistic_loss(X_labeled, X_labeled @ beta_hat, temp, beta)
    return loss_unlabeled - loss_correction

In [13]:
def grad_ppi_logistic_loss(X_labeled, y_labeled, X_unlabeled, beta_hat, temp, beta):
    grad_unlabeled = grad_logistic_loss(X_unlabeled, X_unlabeled @ beta_hat, temp, beta)
    grad_correction = grad_logistic_loss(X_labeled, y_labeled, temp, beta) - grad_logistic_loss(X_labeled, X_labeled @ beta_hat, temp, beta)
    return grad_unlabeled + grad_correction

In [14]:
def optimize_ppi_logistic_regression(X_labeled, y_labeled, X_unlabeled, temp: float=1):
    """
    Optimize the logistic regression parameters using L-BFGS.
    
    Args:
    X_labeled (np.array): Labeled feature matrix
    y_labeled (np.array): Labels for labeled data
    X_unlabeled (np.array): Unlabeled feature matrix
    temp (float): Temperature parameter
    
    Returns:
    np.array: Optimized parameters
    """
    # Initialize beta (parameters)
    initial_beta = np.zeros(X_labeled.shape[1])

    beta_hat = optimize_logistic_regression(X_holdout, y_holdout, temp) # Only use once, then discard this part of data

    loss_func = partial(ppi_logistic_loss, X_labeled, y_labeled, X_unlabeled, beta_hat, temp)
    grad_func = partial(grad_ppi_logistic_loss, X_labeled, y_labeled, X_unlabeled, beta_hat, temp)
    
    # Optimize using BFGS
    result = minimize(
        fun=loss_func,
        x0=initial_beta,
        method='BFGS',
        jac=grad_func,
        options={'disp': True}
    )

    return result.x

In [15]:
beta_ppi = optimize_ppi_logistic_regression(X_train_labeled, y_train_labeled, X_train_unlabeled)

(10,)
Optimization terminated successfully.
         Current function value: 0.428228
         Iterations: 15
         Function evaluations: 16
         Gradient evaluations: 16
         Current function value: -5.224959
         Iterations: 7
         Function evaluations: 70
         Gradient evaluations: 58


  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)


In [16]:
beta_ppi

array([ 0.40178529,  0.01404516,  0.21538011, -0.22745011, -0.40105692,
        0.83671883,  0.98146178,  0.66464789,  0.2080279 ,  0.83749   ])