# 逻辑回归

In [4]:
import os
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import scipy.optimize as opt
%matplotlib inline

In [5]:
def getData(path, names=None):
    return pd.read_csv(path, header=None, names=names)
    pass

In [6]:
data = getData(os.getcwd() + "/../data/ex2data1.txt",["A", "B", "Y"])
data.insert(0,'ones',1)
data['A'] = (data['A']-data['A'].min()) / (data['A'].max()-data['A'].min())
data['B'] = (data['B']-data['B'].min()) / (data['B'].max()-data['B'].min())
# 加入bias

开始实现逻辑回归， 因为在推导公式中，为了方便计算，类别是以，1,-1分类的， 因此需要将0换为-1.
损失函数:$E_{in}(W)=\sum\limits_{n=1}^{N}ln(1+exp(-y_nW^Tx_n))$

In [103]:
def filterData(data):
    '''构造输入和标签'''
    col = data.shape[1]
    x = data.iloc[:,0:col-1]
    y = data.iloc[:,col-1:col].replace(0, -1) 
    return np.array(x.values), np.array(y.values)

def getWLin(X, Y):
    '''线性回归计算初始值'''
    return np.linalg.inv(X.transpose().dot(X)).dot(X.transpose()).dot(Y)

def sigmod(s):
    '''sigmod函数'''
    return 1.0 / (1.0 + np.exp(-s))

def update(W, grad, ita):
    '''更新W'''
    return W - (ita * grad)

def MBGD(W, X, Y):
    '''批量梯度下降'''
    N = len(X)
    grad = np.zeros(X[0].shape)
    for i in range(N):
        sig = sigmod(-Y[i][0] * X[i].dot(W))
        grad += -Y[i][0] * sig * X[i]
    
    return grad / N

def SGD(W, X, Y, n):
    '''随机梯度'''
    sig = sigmod(-Y[n][0] * X[n].dot(W))
    return -Y[n][0] * sig * X[n]
    
    
def cost(W, X, Y):
    '''损失函数'''
    N = len(X)
    cost = 0.0
    for i in range(N):
        err = np.exp(-Y[i][0] * X[i].dot(W))
        cost += np.log(1 + err)
    
    return cost / N

def predict(W, X):
    '''简单预测'''
    predicts = [sigmod(x.dot(W)) for x in X]
    return [1 if x >= 0.5 else 0 for x in predicts]

def train_with_grad(X, Y, W, GD=1, iterations=30000):
    '''训练'''
    N = len(X)
    for i in range(iterations):
        if GD:
            grad = MBGD(W, X, Y) # 全梯度
        else:
            grad = SGD(W, X, Y, i%N) # 随机梯度
        err = cost(W, X, Y)
        W = update(W, grad, 0.1)
    return W, err
    
def train_with_fmin(W):
    '''使用scipy的fmin优化'''
    W = np.matrix(np.zeros(X[0].shape))
    result = opt.fmin_tnc(func=cost, x0=W,fprime=MBGD, args=(X,Y))
    return result
    

In [86]:
X, Y = filterData(data)
W = getWLin(X,Y).reshape(X[0].shape)
W_SGD = train_with_grad(X, Y, W, GD=0)

In [116]:
X, Y = filterData(data)
W = getWLin(X,Y).reshape(X[0].shape)
W_MBGD = train_with_grad(X, Y, W, GD=1)

In [88]:
W = getWLin(X,Y).reshape(X[0].shape)
W_fmin = train_with_fmin(W)

In [117]:
W_SGD, W_MBGD, W_fmin

((array([-11.07971084,  12.57405675,  12.07731836]), 0.20568539193828719),
 (array([-11.0468896 ,  12.47366075,  11.85883354]), 0.20547304306067637),
 (array([-12.81814377,  14.41152224,  13.77727802]), 36, 1))

In [90]:
cost(W_fmin[0], X, Y) # 计算fmin的误差

0.20349796604639114

可以看出误差已经很接近了，但是自己的优化函数的速度完全比不上使用第三方库，因此以后的优化问题均使用fmin。这个练习花了很长时间调正确，一个原因，不熟悉numpy的运算！