# 作业: 逻辑回归

## 作业要求：

用numpy实现Logistic Regression类（补充cell 1中定义的LogisticRegression类），用梯度下降法训练

1. 包括类的方法 loss = fit(X,Y) 函数，用于训练；输入：X为N * d维的训练数据，N为训练样本数，d为数据的维数；Y为N*1维的训练数据真实类别号；输出：loss为列表，包括每轮的损失函数值。

2. 包括类的方法 y_pred, y_pred_label = predict(X) 函数，用于测试；输入： X为N * d维的测试数据， N为测试样本数；输出：y_pred维数为N * 1，为模型的预测（回归）值；y_pred_label维数为N * 1，为根据回归值得到的预测类别号

In [117]:
import numpy as np

class LogisticRegression(object):
    def __init__(self, dim, learning_rate=0.01, max_iter=100, seed=None):
        np.random.seed(seed)
        self.lr = learning_rate
        self.max_iter = max_iter
        self.W = np.random.randn(dim)  # 初始化权重向量
        self.b = 0.0                   # 初始化偏置

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def loss_func(self, Y, Y_pred):
        # 使用交叉熵损失
        eps = 1e-15  # 防止 log(0)
        Y_pred = np.clip(Y_pred, eps, 1 - eps)
        return -np.mean(Y * np.log(Y_pred) + (1 - Y) * np.log(1 - Y_pred))

    def update(self, X, Y, Y_pred):
        m = X.shape[0]
        dw = np.dot(X.T, (Y_pred - Y)) / m
        db = np.sum(Y_pred - Y) / m
        self.W -= self.lr * dw
        self.b -= self.lr * db

    def fit(self, X, Y):
        loss_history = []
        for _ in range(self.max_iter):
            Z = np.dot(X, self.W) + self.b
            Y_pred = self.sigmoid(Z)
            loss = self.loss_func(Y, Y_pred)
            loss_history.append(loss)
            self.update(X, Y, Y_pred)
        return loss_history

    def predict(self, X):
        Z = np.dot(X, self.W) + self.b
        Y_pred = self.sigmoid(Z)
        Y_label = (Y_pred >= 0.5).astype(int)
        return Y_pred, Y_label

In [118]:
import matplotlib.pyplot as plt


def plotData(X,Y):
    plt.figure()
    pos_idx = (Y==1);
    # size m,1
    pos_idx = pos_idx[:,0];
    # size m, 这时才可用来索引某[一]个维度
    neg_idx = (Y==0);
    neg_idx = neg_idx[:,0];

    plt.plot(X[pos_idx,0],X[pos_idx,1],'r+')
    plt.plot(X[neg_idx,0],X[neg_idx,1],'bo')
    
def plotDecisioinBoundary(X,Y):    
    plotData(X,Y)

    plot_num=50;
    plot_num_2D=plot_num**2;

    x_plot = np.linspace(start=X[:,0].min(),stop=X[:,0].max(),num=plot_num)
    y_plot = np.linspace(start=X[:,1].min(),stop=X[:,1].max(),num=plot_num)
    X_plot,Y_plot = np.meshgrid(x_plot,y_plot)

    X_array = np.zeros((plot_num_2D,2))
    X_array[:,0:1] = X_plot.reshape(plot_num_2D,1)
    X_array[:,1:2] = Y_plot.reshape(plot_num_2D,1)

    p_array,_ = regr.predict(X_array)
    P_matrix = p_array.reshape((plot_num,plot_num))

    plt.contour(X_plot,Y_plot,P_matrix,np.array([0.5]))
    
def test(y_pred, y_true):
    true = 0.
    for j in range(y_pred.shape[0]):
        if y_true[j] == y_pred[j]:
            true += 1
    acc = true/y_pred.shape[0]
    return acc