In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
from functools import partial

%matplotlib inline

In [35]:
np.random.seed(42)

n_samples = 100000
n_dims = 10

X = np.random.randn(n_samples, n_dims)

beta_true = np.asarray([0]*(n_dims // 2) + [1] * (n_dims - n_dims//2))

# Generate target variable
y = np.random.binomial(1, 1 / (1 + np.exp(-X.dot(beta_true))))

# Split the data into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.5, random_state=42)
X_train_labeled, X_train_unlabeled, y_train_labeled, _ = train_test_split(X_train, y_train, test_size=0.5, random_state=42)
X_valid, X_holdout, y_valid, y_holdout = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [36]:
def log1pexp(x):
    """
    Numerically accurate evaluation of log(1 + exp(x)).
    
    This function computes log(1 + exp(x)) in a way that is numerically stable
    for both large positive and large negative values of x.
    
    Args:
    x (np.ndarray): Input array
    
    Returns:
    np.ndarray: log(1 + exp(x)) computed in a numerically stable way
    """
    # For large positive x, log(1 + exp(x)) ≈ x
    # For x near zero, we use the direct computation
    # For large negative x, we use exp(x) to avoid overflow
    threshold = np.log(np.finfo(x.dtype).max) - 1e-5
    
    result = np.where(
        x > threshold,
        x,
        np.where(
            x > -threshold,
            np.log1p(np.exp(-np.abs(x))) + np.maximum(x, 0),
            np.exp(x)
        )
    )
    
    return result

In [37]:
def sigmoid(x):
    """
    Numerically stable sigmoid function.

    This function computes the sigmoid of x in a way that is numerically stable
    for both large positive and large negative values of x.

    Args:
    x (np.ndarray): Input array

    Returns:
    np.ndarray: Sigmoid of x computed in a numerically stable way
    """
    # For large negative x, sigmoid(x) ≈ exp(x)
    # For large positive x, sigmoid(x) ≈ 1 - exp(-x)
    # For x near zero, we use the standard formula
    
    mask = x >= 0
    z = np.exp(-np.abs(x))
    
    return np.where(mask, 1 / (1 + z), z / (1 + z))

In [38]:
def logistic_loss(X, y, temp, beta):
    # X: [n, d], beta: [d]
    z = X @ beta / temp
    loss = np.mean(-y * z + log1pexp(z))
    return loss

In [39]:
def grad_logistic_loss(X, y, temp, beta):
    """
    Calculate the gradient of logistic loss with respect to beta.
    
    Args:
    X (np.ndarray): Feature matrix (n_features, n_samples)
    y (np.ndarray): Target vector (n_samples,)
    beta (np.ndarray): Coefficient vector (n_features,)
    
    Returns:
    np.ndarray: Gradient vector (n_features,)
    """
    z = (X @ beta) / temp
    sigmoid_z = sigmoid(z)
    grad_loss = (X.T @ (sigmoid_z - y)) / len(y)
    return grad_loss

In [40]:
def optimize_logistic_regression(X, y, temp: float=1):
    """
    Optimize the logistic regression parameters using L-BFGS.
    
    Args:
    X (np.array): Feature matrix (N x D)
    y (np.array): True binary labels (N,)
    
    Returns:
    np.array: Optimized parameters
    """
    # Initialize beta (parameters)
    initial_beta = np.zeros(X.shape[1])
    loss_func = partial(logistic_loss, X, y, temp)
    grad_func = partial(grad_logistic_loss, X, y, temp)
    print(initial_beta.shape)
    # Optimize using L-BFGS
    result = minimize(
        fun=loss_func,
        x0=initial_beta,
        method='BFGS',
        jac=grad_func,
        options={'disp': True}
    )

    return result.x

In [41]:
X_train @ np.zeros(X.shape[1])

array([0., 0., 0., ..., 0., 0., 0.])

In [42]:
beta_hat = optimize_logistic_regression(X_holdout, y_holdout)

(10,)
Optimization terminated successfully.
         Current function value: 0.428249
         Iterations: 11
         Function evaluations: 12
         Gradient evaluations: 12


In [43]:
beta_hat

array([ 0.0042406 ,  0.02427705, -0.00460026, -0.02401726,  0.02127088,
        1.00630862,  1.03084618,  1.01491724,  1.05909306,  1.00742735])

In [44]:
beta_true

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

In [45]:
def ppi_logistic_loss(X_labeled, y_labeled, X_unlabeled, beta_hat, temp, beta):
    loss_unlabeled = logistic_loss(X_unlabeled, X_unlabeled @ beta_hat, temp, beta)
    loss_correction = logistic_loss(X_labeled, y_labeled, temp, beta) - logistic_loss(X_labeled, X_labeled @ beta_hat, temp, beta)
    return loss_unlabeled - loss_correction

In [46]:
def grad_ppi_logistic_loss(X_labeled, y_labeled, X_unlabeled, beta_hat, temp, beta):
    grad_unlabeled = grad_logistic_loss(X_unlabeled, X_unlabeled @ beta_hat, temp, beta)
    grad_correction = grad_logistic_loss(X_labeled, y_labeled, temp, beta) - grad_logistic_loss(X_labeled, X_labeled @ beta_hat, temp, beta)
    return grad_unlabeled + grad_correction

In [47]:
def optimize_ppi_logistic_regression(X_labeled, y_labeled, X_unlabeled, temp: float=1):
    """
    Optimize the logistic regression parameters using L-BFGS.
    
    Args:
    X_labeled (np.array): Labeled feature matrix
    y_labeled (np.array): Labels for labeled data
    X_unlabeled (np.array): Unlabeled feature matrix
    temp (float): Temperature parameter
    
    Returns:
    np.array: Optimized parameters
    """
    # Initialize beta (parameters)
    initial_beta = np.zeros(X_labeled.shape[1])

    beta_hat = optimize_logistic_regression(X_holdout, y_holdout, temp) # Only use once, then discard this part of data

    loss_func = partial(ppi_logistic_loss, X_labeled, y_labeled, X_unlabeled, beta_hat, temp)
    grad_func = partial(grad_ppi_logistic_loss, X_labeled, y_labeled, X_unlabeled, beta_hat, temp)
    
    # Optimize using BFGS
    result = minimize(
        fun=loss_func,
        x0=initial_beta,
        method='BFGS',
        jac=grad_func,
        options={'disp': True}
    )

    return result.x

In [48]:
beta_ppi = optimize_ppi_logistic_regression(X_train_labeled, y_train_labeled, X_train_unlabeled)

(10,)
Optimization terminated successfully.
         Current function value: 0.428249
         Iterations: 11
         Function evaluations: 12
         Gradient evaluations: 12
         Current function value: -13.247071
         Iterations: 10
         Function evaluations: 75
         Gradient evaluations: 63


  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)


In [49]:
beta_ppi

array([ 0.12192149,  0.07294354, -0.00975373, -0.03227265,  0.10915894,
        1.83538716,  1.36334533,  1.38233433,  1.66854118,  1.63094913])