In [None]:
import numpy as np

class AUCROCSGDOptimizer:
    def __init__(self, learning_rate=0.01, batch_size=32, n_iter=1000, seed=None):
        """
        Optimizer for AUC ROC using SGD.

        :param learning_rate: Learning rate for SGD.
        :param batch_size: Number of samples in each batch for generating pairs.
        :param n_iter: Number of iterations for training.
        :param seed: Random seed for reproducibility.
        """
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_iter = n_iter
        self.seed = seed
        self.weights = None
        self.rng = np.random.default_rng(seed)

    def _generate_pairs(self, X, y):
        """
        Generate positive and negative pairs for training.

        :param X: Input feature matrix of shape (n_samples, n_features).
        :param y: Target labels of shape (n_samples,).
        :return: Two sets of indices for positive and negative samples.
        """
        positive_indices = np.where(y == 1)[0]
        negative_indices = np.where(y == 0)[0]

        pos_batch = self.rng.choice(positive_indices, self.batch_size, replace=True)
        neg_batch = self.rng.choice(negative_indices, self.batch_size, replace=True)

        return pos_batch, neg_batch

    def fit(self, X, y):
        """
        Train the model using SGD to optimize AUC ROC.

        :param X: Input feature matrix of shape (n_samples, n_features).
        :param y: Target labels of shape (n_samples,).
        """
        n_features = X.shape[1]
        self.weights = np.zeros(n_features)  # Initialize weights

        for _ in range(self.n_iter):
            pos_batch, neg_batch = self._generate_pairs(X, y)

            X_pos = X[pos_batch]
            X_neg = X[neg_batch]

            # Pairwise differences
            margin = X_pos - X_neg
            logits = margin @ self.weights

            # Compute gradient using sigmoid
            sigmoid_grad = 1 / (1 + np.exp(logits))
            gradient = -np.mean(margin.T * sigmoid_grad, axis=1)

            # Update weights
            self.weights -= self.learning_rate * gradient

    def predict_proba(self, X):
        """
        Predict probabilities for positive class.

        :param X: Input feature matrix of shape (n_samples, n_features).
        :return: Probabilities of shape (n_samples,).
        """
        logits = X @ self.weights
        return 1 / (1 + np.exp(-logits))

    def predict(self, X):
        """
        Predict binary class labels.

        :param X: Input feature matrix of shape (n_samples, n_features).
        :return: Predicted class labels of shape (n_samples,).
        """
        probabilities = self.predict_proba(X)
        return (probabilities >= 0.5).astype(int)


In [None]:
import numpy as np
from sklearn.base import RegressorMixin

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

class AUCLogisticOptimizer(RegressorMixin):
    def __init__(self, lr=0.01, momentum=1, delta_converged=1e-3, max_steps=1000, batch_size=64):
        self.lr = lr
        self.momentum = momentum
        self.delta_converged = delta_converged
        self.max_steps = max_steps
        self.batch_size = batch_size
        self.W = None  # Weight vector
        self.velocity = None  # For momentum

    def calculate_gradient(self, X1, Y1, X2, Y2):
        logits_diff = (X1 @ self.W) - (X2 @ self.W)
        sigmoid_values = sigmoid(-logits_diff)
        grad_W = -np.mean((sigmoid_values[:, np.newaxis]) * (X1 - X2), axis=0)
        return grad_W

    def fit(self, X, Y):
        """
        Fit the model to the data.
        X: Features (n_samples, n_features).
        Y: Labels (n_samples,).
        """
        n_samples, n_features = X.shape
        self.W = np.zeros(n_features)
        self.velocity = np.zeros(n_features)

        # Separate positive and negative classes
        pos_indices = np.where(Y == 1)[0]
        neg_indices = np.where(Y == 0)[0]

        for step in range(self.max_steps):
            # Randomly sample positive and negative batches
            pos_batch = np.random.choice(pos_indices, size=self.batch_size, replace=True)
            neg_batch = np.random.choice(neg_indices, size=self.batch_size, replace=True)

            X1, Y1 = X[pos_batch], Y[pos_batch]
            X2, Y2 = X[neg_batch], Y[neg_batch]

            # Calculate gradient
            grad_W = self.calculate_gradient(X1, Y1, X2, Y2)

            # Apply momentum and update weights
            self.velocity = self.momentum * self.velocity - self.lr * grad_W
            self.W += self.velocity

            # Check for convergence
            if np.linalg.norm(self.lr * grad_W) < self.delta_converged:
                break

    def predict(self, X):
        probabilities = sigmoid(X @ self.W)
        return (probabilities >= 0.5).astype(int)

    def predict_proba(self, X):
        probabilities = sigmoid(X @ self.W)
        return np.column_stack([1 - probabilities, probabilities])

