In [1]:
import numpy as np

class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        """
        :param learning_rate: 学习率
        :param num_iterations: 梯度下降的最大迭代次数
        """
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.w = None  # 模型参数 w
        self.b = 0.0   # 模型偏置 b

    def sigmoid(self, z):
        """
        sigmoid 函数，将 z 映射到 (0,1)
        """
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        """
        训练逻辑回归模型
        :param X: 训练数据特征，形状为 (m, n)
        :param y: 标签向量，形状为 (m,)
        """
        m, n = X.shape
        # 初始化 w, b
        self.w = np.zeros(n)
        self.b = 0.0

        for i in range(self.num_iterations):
            # 1) 计算线性部分 z = w^T x + b
            z = np.dot(X, self.w) + self.b
            # 2) 计算预测值 (概率) A = sigmoid(z)
            A = self.sigmoid(z)

            # 3) 计算损失 (可选：如果需要查看损失曲线，可在此打印或存储)
            # 交叉熵损失
            # loss = -(1/m) * np.sum(y*np.log(A) + (1-y)*np.log(1-A))

            # 4) 计算梯度
            # dw = (1/m) * X^T (A - y)
            dw = (1/m) * np.dot(X.T, (A - y))
            # db = (1/m) * sum(A - y)
            db = (1/m) * np.sum(A - y)

            # 5) 参数更新
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db

        return self

    def predict_proba(self, X):
        """
        返回预测的概率值，形状为 (m,)
        :param X: 测试/预测数据，形状为 (m, n)
        """
        z = np.dot(X, self.w) + self.b
        return self.sigmoid(z)

    def predict(self, X, threshold=0.5):
        """
        返回二分类预测结果（0 or 1）
        :param X: 测试/预测数据，形状为 (m, n)
        :param threshold: 判定阈值，默认 0.5
        """
        proba = self.predict_proba(X)
        return (proba >= threshold).astype(int)


# ====================
# 以下是一个简单的使用示例
# ====================
if __name__ == "__main__":
    # 构造一份小规模的虚拟数据集
    # 假设有两维特征 (x1, x2)
    X = np.array([
        [0.50, 1.0],
        [1.50, 2.0],
        [3.00, 6.0],
        [4.00, 6.5],
        [4.50, 6.0],
        [5.00, 7.0],
        [6.00, 8.0],
        [7.00, 8.5]
    ])
    # 对应标签
    y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

    # 初始化并训练模型
    model = LogisticRegressionScratch(learning_rate=0.1, num_iterations=1000)
    model.fit(X, y)

    # 打印训练得到的模型参数
    print("Learned weights (w):", model.w)
    print("Learned bias (b):", model.b)

    # 测试一下预测
    X_test = np.array([
        [2.0, 4.0],  # 可能是类 0
        [6.5, 8.0],  # 可能是类 1
    ])
    preds = model.predict(X_test)
    print("Predictions on X_test:", preds)  # 预测结果


Learned weights (w): [ 4.1477231  -2.13970423]
Learned bias (b): -3.8604997547629063
Predictions on X_test: [0 1]


In [1]:
import numpy as np

def sigmoid(z):
    """
    Sigmoid 函数，将实数映射到 (0, 1)，常用于二分类问题中输出概率值。
    """
    return 1 / (1 + np.exp(-z))

def cross_entropy_loss(y_true, y_pred, eps=1e-15):
    """
    交叉熵损失（Cross Entropy Loss）
    y_true: 真实标签, shape = (n_samples, )
    y_pred: 预测概率, shape = (n_samples, )
    eps: 避免log(0)的平滑项
    """
    # clip 操作是为了防止 log(0) 导致的数值不稳定
    y_pred = np.clip(y_pred, eps, 1 - eps)
    # 交叉熵损失的公式
    # L = -1/N * Σ[ y_true*log(y_pred) + (1 - y_true)*log(1 - y_pred) ]
    n = len(y_true)
    loss = -np.sum(y_true * np.log(y_pred) + (1 - y_true)*np.log(1 - y_pred)) / n
    return loss

def cross_entropy_gradient(X, y_true, y_pred):
    """
    交叉熵损失的梯度（相对于模型参数 w 的偏导）。
    X: 输入特征矩阵, shape = (n_samples, n_features)
    y_true: 真实标签, shape = (n_samples, )
    y_pred: 预测概率, shape = (n_samples, )
    return: 形状为 (n_features, ) 的梯度
    """
    # gradient = (1/N) * Σ( (y_pred - y_true) * X )
    # 其中每个样本对 w 的偏导是 (y_pred[i] - y_true[i]) * X[i]
    n = len(y_true)
    grad = np.dot(X.T, (y_pred - y_true)) / n
    return grad

def mse_loss(y_true, y_pred):
    """
    均方误差（Mean Squared Error, MSE）损失
    """
    n = len(y_true)
    loss = np.mean((y_pred - y_true) ** 2)
    return loss

def mse_gradient(X, y_true, y_pred):
    """
    MSE 对应的梯度
    注意：这里的 y_pred 是概率值，但在计算 MSE 时假设其为模型输出，
    因此计算梯度时会跟交叉熵略有不同。
    """
    n = len(y_true)
    # 对 w 的偏导: (2/N) * Σ( (y_pred - y_true) * y_pred*(1-y_pred) * X )
    # 其中 y_pred*(1-y_pred) 是 sigmoid 函数本身对 z 的偏导数
    # 因为 logistic 回归最终对 z 的导数 = 对 y_pred 的导数 * 对 w 的导数
    grad = np.dot(X.T, (y_pred - y_true) * y_pred * (1 - y_pred)) * (2 / n)
    return grad

def hinge_loss(y_true, y_pred, eps=1e-15):
    """
    Hinge Loss 常用于支持向量机，也可以用于逻辑回归的参考。
    要求 y_true ∈ { -1, 1 }。
    这里我们需要将 {0,1} 标签转换为 { -1,1 } 后再计算，或者直接根据实际情况做映射。
    公式: L = 1/N * Σ max(0, 1 - y_true * z)
    其中 z = Xw, 为线性输出, 若我们使用 y_pred = sigmoid(z) 则需要单独处理。
    这里为了演示，简单直接使用 z = logit^-1(y_pred) = log(y_pred / (1 - y_pred)) 逆过来做示意。
    """
    # 先把标签从0/1转换成-1/1
    y_true_transformed = np.where(y_true > 0.5, 1, -1)
    
    # 这里假设 y_pred 是概率，先将概率转成 logit (z)
    # z = log( p / (1-p) ), 注意要clip防止出现除 0
    y_pred_clip = np.clip(y_pred, eps, 1 - eps)
    z = np.log(y_pred_clip / (1 - y_pred_clip))
    
    # hinge loss
    loss = np.mean(np.maximum(0, 1 - y_true_transformed * z))
    return loss

def hinge_gradient(X, y_true, y_pred, eps=1e-15):
    """
    计算 hinge loss 对 w 的梯度。
    hinge loss 的公式: max(0, 1 - y_true*z), z = Xw
    在 y_true*z < 1 的情况下, 对 w 的梯度 = -y_true * X
    注意 y_true ∈ { -1, 1 }，这里先要转换一下标签，然后再根据是否满足 y_true*z < 1 来更新。
    """
    y_true_transformed = np.where(y_true > 0.5, 1, -1)
    
    # 将 y_pred 从概率空间映射回 z = Xw
    y_pred_clip = np.clip(y_pred, eps, 1 - eps)
    z = np.log(y_pred_clip / (1 - y_pred_clip))
    
    # 判断哪些样本满足 y_true * z < 1
    mask = (y_true_transformed * z) < 1
    # 如果满足 y_true*z < 1, 梯度= -y_true*X；否则梯度= 0
    # 这里是对所有样本的和再除以 n
    grad = np.zeros(X.shape[1])
    for i in range(X.shape[0]):
        if mask[i]:
            grad += -y_true_transformed[i] * X[i]
    grad /= X.shape[0]
    
    return grad


class LogisticRegression:
    def __init__(self, learning_rate=0.1, max_iter=1000, loss_type='cross_entropy'):
        """
        Logistic Regression 模型
        learning_rate: 学习率
        max_iter: 最大迭代次数
        loss_type: 损失函数类型，可选：'cross_entropy', 'mse', 'hinge'
        """
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.loss_type = loss_type
        self.w = None  # 模型参数
        
    def fit(self, X, y):
        """
        拟合模型参数 w。
        X: 训练集特征, shape = (n_samples, n_features)
        y: 训练集标签, shape = (n_samples, ), 取值{0,1}
        """
        n_samples, n_features = X.shape
        # 初始化参数 w
        self.w = np.zeros(n_features)
        
        for i in range(self.max_iter):
            # 计算线性输出 z = Xw
            z = np.dot(X, self.w)
            # 计算预测概率 y_pred = sigmoid(z)
            y_pred = sigmoid(z)
            
            # 根据不同的损失函数计算 loss 和 gradient
            if self.loss_type == 'cross_entropy':
                loss = cross_entropy_loss(y, y_pred)
                grad = cross_entropy_gradient(X, y, y_pred)
            elif self.loss_type == 'mse':
                loss = mse_loss(y, y_pred)
                grad = mse_gradient(X, y, y_pred)
            elif self.loss_type == 'hinge':
                loss = hinge_loss(y, y_pred)
                grad = hinge_gradient(X, y, y_pred)
            else:
                raise ValueError(f"未知的损失函数类型：{self.loss_type}")
            
            # 参数更新
            self.w -= self.learning_rate * grad
            
            if i % 100 == 0:
                print(f"迭代 {i}: loss = {loss:.5f}")

    def predict_proba(self, X):
        """
        预测属于正类（1）的概率
        """
        z = np.dot(X, self.w)
        return sigmoid(z)
    
    def predict(self, X, threshold=0.5):
        """
        预测标签，返回0或1
        """
        return (self.predict_proba(X) >= threshold).astype(int)

# ------------------ 测试示例 ------------------

if __name__ == '__main__':
    # 构造一个简单的二分类任务
    np.random.seed(42)
    # 生成 100 个样本, 2 个特征
    X = np.random.randn(100, 2)
    # 人为构造标签: w_true = [2, -1], b=0
    w_true = np.array([2.0, -1.0])
    z_true = np.dot(X, w_true)
    y = (z_true > 0).astype(int)  # 大于0则为正类1，否则为负类0
    
    print("=== 使用交叉熵损失训练 ===")
    model_ce = LogisticRegression(learning_rate=0.1, max_iter=1000, loss_type='cross_entropy')
    model_ce.fit(X, y)
    print("训练结束。最终参数 w =", model_ce.w)
    acc_ce = np.mean(model_ce.predict(X) == y)
    print(f"在训练集上的准确率: {acc_ce:.3f}\n")

    print("=== 使用均方误差损失训练 ===")
    model_mse = LogisticRegression(learning_rate=0.1, max_iter=1000, loss_type='mse')
    model_mse.fit(X, y)
    print("训练结束。最终参数 w =", model_mse.w)
    acc_mse = np.mean(model_mse.predict(X) == y)
    print(f"在训练集上的准确率: {acc_mse:.3f}\n")

    print("=== 使用hinge loss训练 ===")
    model_hinge = LogisticRegression(learning_rate=0.1, max_iter=1000, loss_type='hinge')
    model_hinge.fit(X, y)
    print("训练结束。最终参数 w =", model_hinge.w)
    acc_hinge = np.mean(model_hinge.predict(X) == y)
    print(f"在训练集上的准确率: {acc_hinge:.3f}")


=== 使用交叉熵损失训练 ===
迭代 0: loss = 0.69315
迭代 100: loss = 0.29066
迭代 200: loss = 0.21870
迭代 300: loss = 0.18497
迭代 400: loss = 0.16433
迭代 500: loss = 0.15001
迭代 600: loss = 0.13931
迭代 700: loss = 0.13091
迭代 800: loss = 0.12408
迭代 900: loss = 0.11838
训练结束。最终参数 w = [ 4.76808035 -2.67622331]
在训练集上的准确率: 0.990

=== 使用均方误差损失训练 ===
迭代 0: loss = 0.25000
迭代 100: loss = 0.11400
迭代 200: loss = 0.08494
迭代 300: loss = 0.07149
迭代 400: loss = 0.06332
迭代 500: loss = 0.05766
迭代 600: loss = 0.05345
迭代 700: loss = 0.05015
迭代 800: loss = 0.04748
迭代 900: loss = 0.04525
训练结束。最终参数 w = [ 2.93018933 -1.67307736]
在训练集上的准确率: 0.990

=== 使用hinge loss训练 ===
迭代 0: loss = 1.00000
迭代 100: loss = 0.16382
迭代 200: loss = 0.12763
迭代 300: loss = 0.11155
迭代 400: loss = 0.10343
迭代 500: loss = 0.09729
迭代 600: loss = 0.09221
迭代 700: loss = 0.08812
迭代 800: loss = 0.08452
迭代 900: loss = 0.08156
训练结束。最终参数 w = [ 4.05079356 -2.37794326]
在训练集上的准确率: 0.990
