创建演示数据：

In [1]:
import numpy as np
from sklearn.datasets import make_classification

X, y = make_classification(n_features = 2, n_informative = 2, n_redundant = 0, n_samples = 1000, n_classes = 2, random_state = 0)
y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1

演示数据：

In [2]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x1 = X[y==1][:, 0]
y1 = X[y==1][:, 1]
x2 = X[y==-1][:, 0]
y2 = X[y==-1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('对数几率回归', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["1", "-1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>

使用 scikit-learn 拟合（无正则化）：

In [3]:
from sklearn.linear_model import LogisticRegression

# 初始化对数几率回归器，无正则化
reg = LogisticRegression(penalty="none")
# 拟合线性模型
reg.fit(X, y)
# 权重系数
W = reg.coef_
# 截距
b = reg.intercept_
print("W", W, "b", b)

W [[-0.52627229  4.15584315]] b [0.70189098]


使用 scikit-learn 拟合（L1正则化）：

In [4]:
from sklearn.linear_model import LogisticRegression

# 初始化对数几率回归器，L1正则化，使用坐标下降法
reg_l1 = LogisticRegression(penalty="l1", C=0.01, solver="liblinear")
# 拟合线性模型
reg_l1.fit(X, y)
# 权重系数
W = reg_l1.coef_
# 截距
b = reg_l1.intercept_
print("W", W, "b", b)

W [[0.         1.68135031]] b [0.]


使用 scikit-learn 拟合（L2正则化）：

In [5]:
from sklearn.linear_model import LogisticRegression

# 初始化对数几率回归器，L2正则化
reg_l2 = LogisticRegression(penalty="l2", C=0.01)
# 拟合线性模型
reg_l2.fit(X, y)
# 权重系数
W = reg_l2.coef_
# 截距
b = reg_l2.intercept_
print("W", W, "b", b)

W [[-0.02007655  1.38757284]] b [0.09382014]


使用 scikit-learn 拟合（弹性网络正则化）：

In [6]:
from sklearn.linear_model import LogisticRegression

# 初始化对数几率回归器，弹性网络正则化
reg_en = LogisticRegression(penalty="elasticnet", C=0.01, l1_ratio=0.5, solver="saga")
# 拟合线性模型
reg_en.fit(X, y)
# 权重系数
W = reg_en.coef_
# 截距
b = reg_en.intercept_
print("W", W, "b", b)

W [[0.         1.48885033]] b [0.10301095]


对数几率回归正则化对比:

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, axs = plt.subplots(2,2)
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.3)
ax1 = axs[0,0]
ax2 = axs[0,1]
ax3 = axs[1,0]
ax4 = axs[1,1]

clist=['#8ecae6', '#ffadad']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .01), np.arange(y_min, y_max, .01))
Z = reg.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
ax1.pcolormesh(xx, yy, Z, cmap = newcmp)
ax1.set_title('无正则化', fontsize=10)

Z_l1 = reg_l1.predict(np.c_[xx.ravel(), yy.ravel()])
Z_l1 = Z_l1.reshape(xx.shape)
ax2.pcolormesh(xx, yy, Z_l1, cmap = newcmp)
ax2.set_title('L1正则化', fontsize=10)

Z_l2 = reg_l2.predict(np.c_[xx.ravel(), yy.ravel()])
Z_l2 = Z_l2.reshape(xx.shape)
ax3.pcolormesh(xx, yy, Z_l2, cmap = newcmp)
ax3.set_title('L2正则化', fontsize=10)

Z_en = reg_en.predict(np.c_[xx.ravel(), yy.ravel()])
Z_en = Z_en.reshape(xx.shape)
ax4.pcolormesh(xx, yy, Z_en, cmap = newcmp)
ax4.set_title('弹性网络正则化', fontsize=10)

x1 = X[y==1][:, 0]
y1 = X[y==1][:, 1]
x2 = X[y==-1][:, 0]
y2 = X[y==-1][:, 1]
ax1.scatter(x1, y1, c='#e63946', marker='o', s=1)
ax1.scatter(x2, y2, c='#457b9d', marker='x', s=1)
ax2.scatter(x1, y1, c='#e63946', marker='o', s=1)
ax2.scatter(x2, y2, c='#457b9d', marker='x', s=1)
ax3.scatter(x1, y1, c='#e63946', marker='o', s=1)
ax3.scatter(x2, y2, c='#457b9d', marker='x', s=1)
ax4.scatter(x1, y1, c='#e63946', marker='o', s=1)
ax4.scatter(x2, y2, c='#457b9d', marker='x', s=1)

fig.suptitle('对数几率回归正则化对比', color='#264653')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()

<IPython.core.display.Javascript object>



对数几率回归假设函数：

In [8]:
import numpy as np

def hypothesis(X, W):
    """
    对数几率回归的假设函数
    args:
        X - 训练数据集
        W - 权重系数
    return:
        预测值
    """
    result = 1 / (1 + np.exp(-X.dot(W)))
    result[result>= 0.5] = 1
    result[result < 0.5] = -1
    return result

对数几率回归，使用梯度下降法（gradient descent）:

In [9]:
import numpy as np

c_1 = 1e-4
c_2 = 0.9

def cost(X, y, W):
    """
    对数几率回归的代价函数
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
    return:
        代价函数值
    """
    power = -np.multiply(y, X.dot(W))
    p1 = power[power <= 0]
    p2 = -power[-power < 0]
    # 解决 python 计算 e 的指数幂溢出的问题
    return np.sum(np.log(1 + np.exp(p1))) + np.sum(np.log(1 + np.exp(p2)) - p2)

def dcost(X, y, W):
    """
    对数几率回归的代价函数的梯度
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
    return:
        代价函数的梯度
    """
    return X.T.dot(np.multiply(-y, 1 / (1 + np.exp(np.multiply(y, X.dot(W))))))

def direction(d):
    """
    更新的方向
    args:
        d - 梯度
    return:
        更新的方向
    """
    return -d

def sufficientDecrease(X, y, W, p, step):
    """
    判断是否满足充分下降条件（sufficient decrease condition）
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
        p - 方向
        step - 步长
    return:
        是否满足充分下降条件
    """
    d = dcost(X, y, W)
    return cost(X, y, W + step * p) <= cost(X, y, W) + c_1 * step * p.T.dot(d)

def curvature(X, y, W, p, step):
    """
    判断是否满足曲率条件（curvature condition）
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
        p - 方向
        step - 步长
    return:
        是否满足曲率条件
    """
    d = dcost(X, y, W)
    return -p.T.dot(dcost(X, y, W + step * p)) <= -c_2 * p.T.dot(d)

def select(step_low, step_high):
    """
    在范围内选择一个步长，直接取中值
    args:
        step_low - 步长范围开始值
        step_high - 步长范围结束值
    return:
        步长
    """
    return (step_low + step_high) / 2

def lineSearch(X, y, W, p, step_init, step_max):
    """
    线搜索步长，使其满足 Wolfe 条件
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
        p - 方向
        step_init - 步长初始值
        step_max - 步长最大值
    return:
        步长
    """
    step_i = step_init
    step_low = step_init
    step_high = step_max
    i = 1
    d = dcost(X, y, W)
    while (True):
        # 不满足充分下降条件或者后面的代价函数值大于前一个代价函数值
        if (not sufficientDecrease(X, y, W, p, step_i) or (cost(X, y, W + step_i * p) >= cost(X, y, W + step_low * p) and i > 1)):
            # 将当前步长作为搜索的右边界
            step_high = step_i
        else:
            # 满足充分下降条件并且满足曲率条件，即已经满足了 Wolfe 条件
            if (curvature(X, y, W, p, step_i)):
                # 直接返回当前步长
                return step_i
            step_low = step_i
        # 选择下一个步长
        step_i = select(step_low, step_high)
        i = i + 1

def logisticRegressionGd(X, y, max_iter=1000, tol=1e-4, step_init=0, step_max=10):
    """
    对数几率回归，使用梯度下降法（gradient descent）
    args:
        X - 训练数据集
        y - 目标标签值
        max_iter - 最大迭代次数
        tol - 变化量容忍值
        step_init - 步长初始值
        step_max - 步长最大值
    return:
        W - 权重系数
    """
    # 初始化 W 为零向量
    W = np.zeros(X.shape[1])
    # 开始迭代
    for it in range(max_iter):
        # 计算梯度
        d = dcost(X, y, W)
        # 当梯度足够小时，结束迭代
        if np.linalg.norm(x=d, ord=1) <= tol:
            break
        p = direction(d)
        # 使用线搜索计算步长 
        step = lineSearch(X, y, W, p, step_init, step_max)
        # 更新权重系数 W
        W = W + step * p
    return W

拟合演示数据：

In [10]:
import numpy as np

X_b = np.c_[np.ones((X.shape[0], 1)), X]
W = logisticRegressionGd(X_b, y)
print(W)

[ 0.7018896  -0.5262708   4.15584056]


可视化：

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .01), np.arange(y_min, y_max, .01))
Z = hypothesis(np.c_[np.ones((xx.shape[0] * xx.shape[1], 1)), xx.ravel(), yy.ravel()], W)
Z = Z.reshape(xx.shape)
clist=['#8ecae6', '#ffadad']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==1][:, 0]
y1 = X[y==1][:, 1]
x2 = X[y==-1][:, 0]
y2 = X[y==-1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('对数几率回归-梯度下降法', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["1", "-1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>



对数几率回归，使用牛顿法（newton's method）：

In [12]:
import numpy as np

def ddcost(X, y, W):
    """
    对数几率回归的代价函数的黑塞矩阵
    args:
        X - 训练数据集
        y - 目标标签值
        W - 权重系数
    return:
        代价函数的黑塞矩阵
    """
    exp = np.exp(np.multiply(y, X.dot(W)))
    result = np.multiply(exp, 1 / np.square(1 + exp))
    X_r = np.zeros(X.shape)
    for i in range(X.shape[1]):
        X_r[:, i] = np.multiply(result, X[:, i])
    return X_r.T.dot(X)

def direction(d, H):
    """
    更新的方向
    args:
        d - 梯度
        H - 黑塞矩阵
    return:
        更新的方向
    """
    return - np.linalg.inv(H).dot(d)

def logisticRegressionNewton(X, y, max_iter=1000, tol=1e-4, step_init=0, step_max=10):
    """
    对数几率回归，使用牛顿法（newton's method）
    args:
        X - 训练数据集
        y - 目标标签值
        max_iter - 最大迭代次数
        tol - 变化量容忍值
        step_init - 步长初始值
        step_max - 步长最大值
    return:
        W - 权重系数
    """
    # 初始化 W 为零向量
    W = np.zeros(X.shape[1])
    # 开始迭代
    for it in range(max_iter):
        # 计算梯度
        d = dcost(X, y, W)
        # 计算黑塞矩阵
        H = ddcost(X, y, W)
        # 当梯度足够小时，结束迭代
        if np.linalg.norm(d) <= tol:
            break
        p = direction(d, H)
        # 使用线搜索计算步长 
        step = lineSearch(X, y, W, p, step_init, step_max)
        # 更新权重系数 W
        W = W + step * p
    return W

拟合演示数据：

In [13]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
W = logisticRegressionNewton(X_b, y)
print(W)

[ 0.70189024 -0.52627055  4.15583967]


可视化：

In [14]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .01), np.arange(y_min, y_max, .01))
Z = hypothesis(np.c_[np.ones((xx.shape[0] * xx.shape[1], 1)), xx.ravel(), yy.ravel()], W)
Z = Z.reshape(xx.shape)
clist=['#8ecae6', '#ffadad']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==1][:, 0]
y1 = X[y==1][:, 1]
x2 = X[y==-1][:, 0]
y2 = X[y==-1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('对数几率回归-牛顿法', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["1", "-1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>



随机梯度下降法（SGD）:

In [15]:
import numpy as np

def logisticRegressionSGD(X, y, max_iter=100, tol=1e-4, step=1e-1):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    xy = np.c_[X.reshape(X.shape[0], -1), y.reshape(X.shape[0], 1)]
    for it in range(max_iter):
        s = step / (np.sqrt(it + 1))
        np.random.shuffle(xy)
        X_new, y_new = xy[:, :-1], xy[:, -1:].ravel()
        for i in range(0, X.shape[0]):
            d = dcost(X_new[i], y_new[i], W)
            if (np.linalg.norm(d) <= tol):
                break
            W = W - s * d
            Ws.append(W)
        else:
            continue
    return Ws

In [16]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionSGD(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.7052423  -0.45441383  4.08380597]


In [17]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-随机梯度下降法', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

小批量随机梯度下降法（MBGD）:

In [18]:
import numpy as np

def logisticRegressionMBGD(X, y, batch_size=50, max_iter=100, tol=1e-4, step = 1e-1):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    xy = np.c_[X.reshape(X.shape[0], -1), y.reshape(X.shape[0], 1)]
    for it in range(max_iter):
        s = step / (np.sqrt(it + 1))
        np.random.shuffle(xy)
        for start in range(0, X.shape[0], batch_size):
            stop = start + batch_size
            X_batch, y_batch = xy[start:stop, :-1], xy[start:stop, -1:].ravel()
            d = dcost(X_batch, y_batch, W)
            if (np.linalg.norm(d) <= tol):
                break
            W = W - s * d
            Ws.append(W)
        else:
            continue
    return Ws

In [19]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionMBGD(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70865362 -0.53939208  4.15496557]


In [20]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-批量梯度下降法', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

随机平均梯度下降法（SAG）:

In [21]:
import numpy as np

def logisticRegressionSAG(X, y, max_iter=100, tol=1e-4, step=1e-1):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    p = np.zeros(X.shape[1])
    d_prev = np.zeros(X.shape)
    for it in range(max_iter):
        s = step / (np.sqrt(it + 1))
        for it in range(X.shape[0]):
            i = np.random.randint(0, X.shape[0])
            d = dcost(X[i], y[i], W)
            p = p - d_prev[i] + d
            d_prev[i] = d
            p_avg = p / X.shape[0]
            if (np.linalg.norm(p_avg) <= tol):
                return Ws
            W = W - s * p_avg
        Ws.append(W)
    return Ws

In [22]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionSAG(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70518166 -0.52839515  4.1631028 ]


In [23]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1).astype(dtype=np.str), costs)

ax.set_title('对数几率回归-随机平均梯度下降法', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

方差缩减随机梯度下降法（SVRG）:

In [24]:
import numpy as np

def logisticRegressionSVRG(X, y, max_iter=100, m = 100, tol=1e-4, step=1e-1):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    for it in range(max_iter):
        s = step / (np.sqrt(it + 1))
        g = np.zeros(X.shape[1])
        for i in range(X.shape[0]): 
            g = g + dcost(X[i], y[i], W)
        g = g / X.shape[0]
        w = W
        for it in range(m):
            i = np.random.randint(0, X.shape[0])
            d_w = dcost(X[i], y[i], w)
            d_W = dcost(X[i], y[i], W)
            d = d_w - d_W + g
            if (np.linalg.norm(d) <= tol):
                break
            w = w - s * d
        W = w
        Ws.append(W)
    return Ws

In [25]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionSVRG(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.63768803 -0.48112644  3.98975306]


In [26]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-方差缩减随机梯度下降法', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

SAGA法:

In [27]:
import numpy as np

def logisticRegressionSAGA(X, y, max_iter=100, tol=1e-4, step=1e-1):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    p = np.zeros(X.shape[1])
    d_prev = np.zeros(X.shape)
    for i in range(X.shape[0]): 
        d_prev[i] = dcost(X[i], y[i], W)
    for it in range(max_iter):
        s = step / (np.sqrt(it + 1))
        for it in range(X.shape[0]):
            i = np.random.randint(0, X.shape[0])
            d = dcost(X[i], y[i], W)
            p = d - d_prev[i] + np.mean(d_prev, axis=0) 
            d_prev[i] = d
            if (np.linalg.norm(p) <= tol):
                break
            W = W - s * p
        else:
            continue
        Ws.append(W)
    return Ws

In [28]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionSAGA(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70503029 -0.52839461  4.16421806]


In [29]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-SAGA', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

DPF法:

In [30]:
import numpy as np

def logisticRegressionDPF(X, y, max_iter=100, tol=1e-4):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    D_k = np.eye(X.shape[1])
    g_k = dcost(X, y, W)
    for it in range(max_iter):
        d_k = -D_k.dot(g_k)
        s = lineSearch(X, y, W, d_k, 0, 10)
        s_k = s * d_k
        W = W + s_k
        Ws.append(W)
        g_k_1 = dcost(X, y, W)
        if (np.linalg.norm(g_k_1) <= tol):
            break
        y_k = (g_k_1 - g_k).reshape(-1, 1)
        s_k = s_k.reshape(-1, 1)
        D_k = D_k + s_k.dot(s_k.T) / s_k.T.dot(y_k) - D_k.dot(y_k).dot(y_k.T).dot(D_k) / y_k.T.dot(D_k).dot(y_k)
        g_k = g_k_1
    return Ws

In [31]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionDPF(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70189113 -0.52627105  4.15584194]


In [32]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-DPF', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

BFGS法:

In [33]:
import numpy as np

def logisticRegressionBFGS(X, y, max_iter=100, tol=1e-4):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    D_k = np.eye(X.shape[1])
    g_k = dcost(X, y, W)
    for it in range(max_iter):
        d_k = -D_k.dot(g_k)
        s = lineSearch(X, y, W, d_k, 0, 10)
        s_k = s * d_k
        W = W + s_k
        Ws.append(W)
        g_k_1 = dcost(X, y, W)
        if (np.linalg.norm(g_k_1) <= tol):
            break
        y_k = (g_k_1 - g_k).reshape(-1, 1)
        s_k = s_k.reshape(-1, 1)
        a = s_k.dot(y_k.T)
        b = y_k.T.dot(s_k)
        c = s_k.dot(s_k.T)
        D_k = (np.eye(X.shape[1]) - a / b).dot(D_k).dot((np.eye(X.shape[1]) - a.T / b)) + c / b
        g_k = g_k_1
    return Ws

In [34]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionBFGS(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70188875 -0.52626868  4.15583888]


In [35]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1).astype(dtype=np.str), costs)

ax.set_title('对数几率回归-BFGS', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

L-BFGS法:

In [36]:
import numpy as np

def calcDirection(ss, ys, rhos, g_k, m, k):
    delta = 0
    L = k
    q = g_k.reshape(-1, 1)
    if k > m:
        delta = k - m
        L = m
    alphas = np.zeros(L)
    for i in range(L - 1, -1, -1):
        j = i + delta
        alpha = rhos[j].dot(ss[j].T).dot(q)
        alphas[i] = alpha
        q = q - alpha * ys[j]
    r = np.eye(g_k.shape[0]).dot(q)
    for i in range(0, L):
        j = i + delta
        beta = rhos[j].dot(ys[j].T).dot(r)
        r = r + (alphas[i] - beta) * ss[j]
    return -r.ravel()

def logisticRegressionLBFGS(X, y, m=100, max_iter=100, tol=1e-4):
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    g_k = dcost(X, y, W)
    d_k = -np.eye(X.shape[1]).dot(g_k)
    ss = []
    ys = []
    rhos = []
    for it in range(max_iter):
        d_k = calcDirection(ss, ys, rhos, g_k, m, it)
        s = lineSearch(X, y, W, d_k, 0, 1)
        s_k = s * d_k
        W = W + s_k
        Ws.append(W)
        g_k_1 = dcost(X, y, W)
        if (np.linalg.norm(g_k_1) <= tol):
            break
        y_k = (g_k_1 - g_k).reshape(-1, 1)
        s_k = s_k.reshape(-1, 1)
        ss.append(s_k)
        ys.append(y_k)
        rhos.append(1 / (y_k.T.dot(s_k)))
        g_k = g_k_1
    return Ws

In [37]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionLBFGS(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70189123 -0.52627044  4.15584229]


In [38]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-L-BFGS', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>

牛顿共轭梯度法（Newton-CG）:

In [39]:
import numpy as np

def cg(H, g, max_iter=100, tol=1e-4):
    """
    共轭梯度法
    H * deltaW = g
    """
    deltaW = np.zeros(g.shape[0])
    i = 0
    r = g
    d = r
    delta = np.dot(r, r)
    delta_0 = delta
    while i < max_iter:
        q = H.dot(d)
        alpha = delta / (np.dot(d, q))
        deltaW = deltaW + alpha * d
        r = r - alpha * q
        delta_prev = delta
        delta = np.dot(r, r)
        if delta <= tol * tol * delta_0:
            break
        beta = delta / delta_prev
        d = r + beta * d
        i = i + 1
    return deltaW

def logisticRegressionNewtonCG(X, y, max_iter=100, tol=1e-4):
    """
    对数几率回归，使用牛顿共轭梯度法（Newton-Conjugate Gradient）
    args:
        X - 训练数据集
        y - 目标标签值
        max_iter - 最大迭代次数
        tol - 变化量容忍值
    return:
        W - 权重系数
    """
    # 初始化 W 为零向量
    W = np.zeros(X.shape[1])
    Ws = []
    Ws.append(W)
    # 开始迭代
    for it in range(max_iter):
        # 计算梯度
        d = dcost(X, y, W)
        # 当梯度足够小时，结束迭代
        if np.linalg.norm(d) <= tol:
            break
        # 计算黑塞矩阵
        H = ddcost(X, y, W)
        # 使用共轭梯度法计算ΔW 
        deltaW = cg(H, d)
        W = W - deltaW
        Ws.append(W)
    return Ws

In [40]:
import numpy as np

y_classes = np.unique(y)
y[y == y_classes[0]] = -1
y[y == y_classes[1]] = 1
X_b = np.c_[np.ones((X.shape[0], 1)), X]
Ws = logisticRegressionNewtonCG(X_b, y)
W = Ws[len(Ws) - 1]
print(W)

[ 0.70188971 -0.52627007  4.15584068]


In [41]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

costs = []
for i in range(len(Ws)):
    costs.append(cost(X_b, y, Ws[i]))
    
ax.plot(np.arange(0, len(Ws), 1), costs)

ax.set_title('对数几率回归-牛顿共轭梯度法', color='#264653')
ax.set_xlabel('迭代次数', color='#264653')
ax.set_ylabel('代价函数', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.show()

<IPython.core.display.Javascript object>