In [133]:
import numpy as np
import time
import copy
from __future__ import division
import traceback

# with dense vector negative sample 0, postive sample 1
def performance(f):
    def fn(*args, **kw):
        t_start = time.time()
        r = f(*args, **kw)
        t_end = time.time()
        print ('call %s() in %fs' % (f.__name__, (t_end - t_start)))
        return r
    return fn


# U is 2d-array with 2m*d dimension for each person item tuple of (x, label)

def mlr(W, U, x):
    """
    calculate mixture logistic regression
    :param U: m * d
    :param W: m * d
    :param x: d
    :return:
    """
    ux = np.dot(U, x)
    eux = softmax(ux)
    del ux
    return np.dot(eux, sigmoid(np.dot(W, x)))

def sigmoid(z):
    """
    calculate sigmoid
    :param z:
    :return:
    """
    return 1 / (1 + np.exp(-z))

def softmax(x):
    """
    softmax a array
    :param x:
    :return:
    """
    e_x = np.exp(x)
    return e_x / e_x.sum()

def calLoss(X, y, weight_W, weight_U, norm21, norm1):
    """
    :param data:
    :param weight_W:
    :param weight_U:
    :return:
    """
    functionLoss = calFunctionLoss(weight_W, weight_U, X, y)
    norm21Loss = calNorm21(weight_W + weight_U)
    norm1Loss = calNorm1(weight_W + weight_U)
    print(functionLoss , norm21 * norm21Loss , norm1 * norm1Loss)
    return functionLoss + norm21 * norm21Loss + norm1 * norm1Loss

def calFunctionLossOne(W_w, W_u,x, y):
    p = mlr(W_w, W_u, x)
    if y == 0:
        return - np.log(1 - p)
    else:
        return - np.log(p)


def calFunctionLoss(W_w, W_u, X, y):
    """
    calculate the loss over all data
    :param w_w:
    :param w_u:
    :param data:
    :return:
    """
    loss = map(lambda (x,y): calFunctionLossOne(W_w,W_u,x, y), zip(X, y))
    loss = sum(loss)
    return loss
    # print("loss is:  %s" % loss)

def calNorm21(weight):
    '''
        计算norm21
    :param weight:
    :return:
    '''
    return (weight ** 2).sum() ** 0.5

def calNorm1(weight):
    """
        计算norm1
    :param weight:
    :return:
    """
    return np.abs(weight).sum()

def calDimension21(W):
    """
        计算每一个维度的L2
    :param W:
    :return:{dimension1:std1, dimension2:std2 ......}
    """
    return (W**2).sum(axis = 0) ** 0.5

# derivative for each sample
def cal_derivative(W_w, W_u, x, y):
    """
    calculate derivative
    :param weight:
    :return:
    """
    ux = np.dot(W_u, x)
    eux = softmax(ux)
    del ux
    sig = sigmoid(np.dot(W_w, x))
    mlr = np.dot(eux, sig)
    prob_scalar =  - (y - mlr) / (mlr * (1 - mlr))
    dir_U = np.outer(prob_scalar * eux * (sig - mlr), x)
    dir_W = np.outer(prob_scalar * sig * (1 - sig) * eux, x)
    return dir_W, dir_U


def sumCalDerivative(WW, WU, X, y):
    all = map(lambda (x,y): cal_derivative(WW, WU,x, y), zip(X,y))
    LW, LU = reduce(lambda x, y: (x[0] + y[0], x[1] + y[1]),all,(0,0))
    return LW, LU




def virtualGradient(WW, WU, GW, GU,beta,lamb):
    """
    :param weight_W:
    :param weight_U:
    :param gradient_W:
    :param gradient_U:
    :param norm21:
    :param norm1:
    :return:
    """
    #计算θ_i·
    D21 = calDimension21(WW + WU)
    #计算v：
    VW = calV(GW, beta)
    VU = calV(GU, beta)
    #计算v_i·
    VD21 = calDimension21(VW + VU)
    sumVD21 = sum(VD21)
    #计算d_ij
    DW = calDij(GW, WW, VW, D21, sumVD21, beta, lamb)
    DU = calDij(GU, WU, VU, D21, sumVD21, beta, lamb)
    return DW, DU


def calV(L, beta):
    """
    :param LW:
    :param LU:
    :param beta:
    :param lamb:
    :return:
    """
    V = np.copy(L)
    V = np.maximum(np.abs(V) - beta, 0)
    return V*np.sign(-L)

def calDij(L, W, V, D21, sumVD21, beta, lamb):
    mask1 = (W != 0)
    mask2 = (W == 0) * np.tile((D21 != 0), (len(W),1))
    mask3 = np.tile((D21 == 0), (len(W),1))
    D21_tmp = np.copy(D21)
    D21_tmp[D21_tmp == 0] = 1
    s = - L - lamb * W / D21_tmp
    cond1 =  s - beta * np.sign(W)
    cond2 = np.maximum(np.abs(s) - beta, 0.0)*np.sign(s)
    if (sumVD21 != 0 ):
        cond3 = V * (max(sumVD21 - lamb, 0.0) / sumVD21)
    else:
        cond3 = V * max(sumVD21 - lamb, 0.0)
    return mask1 * cond1 + mask2 * cond2 + mask3 * cond3


def loop(length, latest, direction):
    count = 0
    if(direction == 'right'):
        while(count < length):
            if(latest + count + 1 < length):
                yield latest + count + 1
                count += 1
            else:
                yield latest + count + 1 - length
                count += 1
    elif(direction == 'left'):
        while(count < length):
            if(latest - count >= 0):
                yield latest - count
                count += 1
            else:
                yield length + latest - count
                count += 1
    else:
        raise Exception("please enter left or right")


def adam(VW, VU,m_w, m_u, v_w, v_u, beta1, beta2, it, alpha, epison):
    m_w = beta1 * m_w - (1 - beta1) * VW
    m_u = beta1 * m_u - (1 - beta1) * VU
    v_w = beta2 * v_w + (1 - beta2) * (VW**2)
    v_u = beta2 * v_u + (1 - beta2) * (VU**2)
    m_w_hat = -m_w / (1 - beta1 ** it)
    m_u_hat = -m_u / (1 - beta1 ** it)
    mask_w = np.sign(m_w_hat) * np.sign(VW) > 0
    mask_u = np.sign(m_u_hat) * np.sign(VU) > 0
    m_w_hat = m_w_hat * mask_w
    m_u_hat = m_u_hat * mask_u
    v_w_hat = v_w / (1 - beta2**it)
    v_u_hat = v_u / (1 - beta2**it)
    return m_w, m_u, v_w, v_u, alpha * (m_w_hat / (v_w_hat ** 0.5 + epison)), alpha * (m_u_hat / (v_u_hat  ** 0.5 + epison))


## weight_w, weight_u, s
def lbfgs(VW, VU, sList_w,sList_u, yList_w, yList_u, k, m, start):
    """
    :param feaNum:
    :param gk : matrix, 2m*d
    :param sList:3d*matrix,steps * 2m * d
    :param yList:3d*matrix, steps * 2m * d
    :return:
    """
    if((sList_w[start] * yList_w[start] + sList_u[start] * yList_u[start]).sum() > 0 ):
        q_u = np.copy(VU)
        q_w = np.copy(VW)
        # for delta
        L = k + 1 if k < m else m
        alphaList = np.zeros(L)
        ro = (yList_w[(start - 1) % m] * sList_w[(start - 1) % m] + yList_u[(start - 1) % m] * sList_u[(start - 1) % m]).sum()
        print ("ro %f" % ro)

        for i in loop(L, start, 'left'):
            alpha = (sList_u[i] * q_u + sList_w[i] * q_w).sum() / (sList_u[i] * yList_u[i] + sList_w[i] * yList_u[i]).sum()
            print ("alpha %f" % alpha)
            if(alpha == np.nan or alpha == np.inf or alpha == -np.inf):
                return VW, VU
            q_u = q_u - alpha * yList_u[i]
            q_w = q_w - alpha * yList_w[i]
            alphaList[i] = alpha
        
        q_u = q_u * ro
        q_w = q_w * ro

        for i in loop(L,start,'right'):
            beta = (yList_u[i] * q_u + yList_w[i] * q_w).sum() / (sList_u[i] * yList_u[i] + sList_w[i] * yList_u[i]).sum()
            q_u = q_u + (alphaList[i] - beta) * sList_u[i]
            q_w = q_w + (alphaList[i] - beta) * sList_w[i]

        mask_u = np.sign(q_u) * np.sign(VU) > 0
        mask_w = np.sign(q_w) * np.sign(VW) > 0

        return q_w * mask_w, q_u * mask_u
    else:
        return VW, VU

def backTrackingLineSearch(X, y, weight_W, weight_U,norm21, norm1, pW, pU):
    """
    :param it:
    :param oldLoss:
    :param data:
    :param WW:
    :param WU:
    :param GW:
    :param GU:
    :param vGW:
    :param vGU:
    :return:
    """
    alpha = 1.0
    c = 0.5
    tao = 0.9
    LW, LU = sumCalDerivative(weight_W, weight_U, X, y)
    m = (pW * LW + pU * LU).sum()
    t = - c * m
    loss = calLoss(X, y, weight_W, weight_U, norm21, norm1)

    while True:
        newW = weight_W - alpha*pW
        newU = weight_U - alpha*pU

        new_loss = calLoss(X, y, newW, newU, norm21, norm1)

        if(loss > new_loss + alpha * t):
            return newW, newU
        else:
            alpha = tao * alpha

def fixOrthant(GW, weight_W, new_weight_W):
    mask = (weight_W == 0) * np.sign(GW) + (weight_W != 0) * np.sign(weight_W)
    mask = mask * new_weight_W > 0
    return new_weight_W * mask


In [141]:
import numpy as np
import copy
import pickle
import time
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_curve,roc_auc_score
import random
import logging

class LSPLM:

    def __init__(self,
                 pieceNum = 12,
                 iterNum = 1000,
                 intercept = True,
                 beta1 = 0.1,
                 beta2 = 0.1,
                 alpha = 0.001,
                 epison = 10e-8,
                 lamb = 0.1,
                 beta = 0.1,
                 terminate = False
                 ):
        """
        :param feaNum:  特征数
        :param classNum:    类别数
        :param iterNum:
        :param intercept:
        :param memoryNum:
        :param beta:
        :param lamb:
        :param u_stdev:
        :param w_stdev:
        """
        self.pieceNum = pieceNum
        self.iterNum = iterNum
        self.intercept = intercept
        self.beta = beta
        self.lamb = lamb
        self.beta1 = beta1
        self.beta2 = beta2
        self.alpha = alpha
        self.N = 0
        self.p = 0
        self.terminate = terminate
        self.epison = epison

    def fit(self,X, y):
        """
            训练ls-plm large scale piece-wise linear model
        :param data:
        :return:
        """

        # np.random.seed(0)
        N, p = X.shape
        self.N = N
        if self.intercept:
            self.p = p + 1
            pad = np.ones((N, p + 1))
            pad[:,:-1] = X
            X = pad
            del pad
        else:
            self.p = p

        ## Intialization
        np.random.seed(0)
        weight_W = np.random.normal(0,0.1, (self.pieceNum, self.p))
        weight_U = np.random.normal(0,0.1, (self.pieceNum, self.p))
        best_weight_W = np.random.normal(0,0.1, (self.pieceNum, self.p))
        best_weight_U = np.random.normal(0,0.1, (self.pieceNum, self.p))
        m_w = np.zeros((self.pieceNum, self.p))
        m_u = np.zeros((self.pieceNum, self.p))
        v_w = np.zeros((self.pieceNum, self.p))
        v_u = np.zeros((self.pieceNum, self.p))
        loss_before = calLoss(X, y, weight_W, weight_U, self.lamb, self.beta)
        GW, GU = sumCalDerivative(weight_W, weight_U, X, y)
        LW, LU = virtualGradient(weight_W, weight_U, GW, GU,self.beta,self.lamb)
        it = 1
        del GW,GU
        loss_best = np.maximum
        best_iter = 0
        optimize_counter = 0
        while it < self.iterNum:
            print "iter:%d, loss:%s" % (it, loss_before)
            start_time = time.time()
            
            GW, GU = sumCalDerivative(weight_W, weight_U, X, y)
            newLW, newLU = virtualGradient(weight_W, weight_U, GW, GU,self.beta,self.lamb)
            del GW,GU


            m_w, m_u, v_w, v_u, PW, PU = adam(newLW, newLU, m_w, m_u, v_w, v_u, self.beta1, self.beta2, it, self.alpha, self.epison)

            new_weight_W, new_weight_U = weight_W + PW, weight_U + PU

            del PW, PU

            new_weight_W = fixOrthant(newLW, weight_W, new_weight_W)
            new_weight_U = fixOrthant(newLU, weight_U, new_weight_U)
            loss_now = calLoss(X, y, new_weight_W, new_weight_U, self.lamb, self.beta)
            if(loss_now < loss_best):
                loss_best = loss_now
                best_iter = it
                best_weight_W = new_weight_W
                best_weight_U = new_weight_U
            if(loss_before < loss_now):
                optimize_counter += 1
                if(optimize_counter >= 10):
                    self.weight_U = best_weight_U
                    self.weight_W = best_weight_W
                    print("use time: ", time.time() - start_time)
                    print("The best result get at %d iteration with loss: %f" % (best_iter, loss_best))
                    return "Done!"
            else:
                optimize_counter = 0
            

            weight_U = new_weight_U
            weight_W = new_weight_W
            LW = newLW
            LU = newLU
            del newLW
            del newLU
            del new_weight_U
            del new_weight_W
            loss_before = loss_now
            

            it += 1


        logging.info("============iterator : %s end ==========" % it)
        print("")

        print("The best result get at %d iteration with loss: %f" % (best_iter, loss_best))
        print("Done!")
        self.weight_W = best_weight_W
        self.weight_U = best_weight_U

    def predict_proba(self, X):
        N, p = X.shape
        if self.intercept:
            pad = np.ones((N, p + 1))
            pad[:,:-1] = X
            X = pad
            del pad
        return np.array(map(lambda x: mlr(self.weight_W,self.weight_U,x),X))


    def predict(self, X):
        return np.array(self.predict_proba(X) > 0.5, dtype = int)

In [4]:
# test
X1 = np.random.normal(1,0.2, (100, 10))
X2 = np.random.normal(-1,0.2, (100,10))
y = np.zeros(200)
for i in range(0,100):
    y[i] = 1
ls = LSPLM(iterNum=1000,lamb = 0.1,beta = 0.1,pieceNum=1)
ls.fit(X,Y)
y_test = ls.predict_proba(X)

(200, 10)

(79.444532195606982, 0.056534021568401704, 0.15686732448500956)
iter:1, loss:79.6579335417
(78.797371346601707, 0.056423805812347919, 0.15646732502015478)
iter:2, loss:79.0102624774
(78.15458057639907, 0.056316216507825728, 0.15606733501039685)
iter:3, loss:78.3669641279
(77.516143492957397, 0.056211268193387988, 0.15566734994992495)
iter:4, loss:77.7280221111
(76.882043974494493, 0.056108975406856144, 0.15526736741919608)
iter:5, loss:77.0934203173
(76.25226604618878, 0.056009352630481506, 0.15486738690601609)
iter:6, loss:76.4631427857
(75.626793674135598, 0.055912414134031065, 0.15446740851657847)
iter:7, loss:75.8371734968
(75.005610705601754, 0.05581817391704872, 0.154067432540577)
iter:8, loss:75.2154963121
(74.388700859155065, 0.055726645689838109, 0.15366745936829679)
iter:9, loss:74.5980949642
(73.776047725279, 0.055637842863764664, 0.15326748949685581)
iter:10, loss:73.9849530576
(73.167634768774931, 0.055551778543902197, 0.15286752356259981)
iter:11, loss:73.3760540709
(72.6

array([  9.99574259e-01,   9.99276857e-01,   9.99477174e-01,
         9.99287927e-01,   9.99119298e-01,   9.98868980e-01,
         9.97196687e-01,   9.98844081e-01,   9.98802928e-01,
         9.99492602e-01,   9.98618967e-01,   9.99131605e-01,
         9.97375634e-01,   9.98383896e-01,   9.98421555e-01,
         9.99567532e-01,   9.99346989e-01,   9.98496243e-01,
         9.99386654e-01,   9.99032303e-01,   9.99119541e-01,
         9.98163729e-01,   9.99260346e-01,   9.99344587e-01,
         9.98531176e-01,   9.99725458e-01,   9.99237336e-01,
         9.99115399e-01,   9.99472103e-01,   9.98904084e-01,
         9.98595914e-01,   9.96430742e-01,   9.99182949e-01,
         9.98983665e-01,   9.99371488e-01,   9.98752404e-01,
         9.99350143e-01,   9.98014974e-01,   9.98896762e-01,
         9.98957143e-01,   9.99582789e-01,   9.99592088e-01,
         9.98648468e-01,   9.99624666e-01,   9.99256151e-01,
         9.98842785e-01,   9.99515645e-01,   9.99169240e-01,
         9.98338005e-01,

In [131]:
predict = np.vectorize(mlr,excluded=['W','U'])
pad = np.ones((200, 10 + 1))
pad[:,:-1] = X
X2 = pad

In [132]:
predict(W = ls.weight_W, U = ls.weight_U, x = X2)


ValueError: shapes (1,11) and (1,11) not aligned: 11 (dim 1) != 1 (dim 0)

In [65]:
test()

UnboundLocalError: local variable 'a' referenced before assignment

In [98]:
y = ls.predict_proba(X)

In [104]:
np.array(y) > 0.5

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,