In [2]:
import numpy as np
import copy
import pickle
import time
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_curve,roc_auc_score
import random
import logging
from function import *

class LSPLM:

    def __init__(self,
                 pieceNum = 12,
                 iterNum = 1000,
                 intercept = True,
                 beta1 = 0.1,
                 beta2 = 0.1,
                 alpha = 0.001,
                 epison = 10e-8,
                 lamb = 0.1,
                 beta = 0.1,
                 terminate = False
                 ):
        """
        :param feaNum:  特征数
        :param classNum:    类别数
        :param iterNum:
        :param intercept:
        :param memoryNum:
        :param beta:
        :param lamb:
        :param u_stdev:
        :param w_stdev:
        """
        self.pieceNum = pieceNum
        self.iterNum = iterNum
        self.intercept = intercept
        self.beta = beta
        self.lamb = lamb
        self.beta1 = beta1
        self.beta2 = beta2
        self.alpha = alpha
        self.N = 0
        self.p = 0
        self.terminate = terminate
        self.epison = epison

    def fit(self,X, y):
        """
            训练ls-plm large scale piece-wise linear model
        :param data:
        :return:
        """

        # np.random.seed(0)
        N, p = X.shape
        self.N = N
        if self.intercept:
            self.p = p + 1
            pad = np.ones((N, p + 1))
            pad[:,:-1] = X
            X = pad
            del pad
        else:
            self.p = p

        ## Intialization
        np.random.seed(0)
        weight_W = np.random.normal(0,0.1, (self.pieceNum, self.p))
        weight_U = np.random.normal(0,0.1, (self.pieceNum, self.p))
        best_weight_W = np.random.normal(0,0.1, (self.pieceNum, self.p))
        best_weight_U = np.random.normal(0,0.1, (self.pieceNum, self.p))
        m_w = np.zeros((self.pieceNum, self.p))
        m_u = np.zeros((self.pieceNum, self.p))
        v_w = np.zeros((self.pieceNum, self.p))
        v_u = np.zeros((self.pieceNum, self.p))
        loss_before = calLoss(X, y, weight_W, weight_U, self.lamb, self.beta)
        GW, GU = sumCalDerivative(weight_W, weight_U, X, y)
        LW, LU = virtualGradient(weight_W, weight_U, GW, GU,self.beta,self.lamb)
        it = 1
        del GW,GU
        loss_best = np.maximum
        best_iter = 0
        optimize_counter = 0
        while it < self.iterNum:
            print "iter:%d, loss:%s" % (it, loss_before)
            start_time = time.time()
            
            GW, GU = sumCalDerivative(weight_W, weight_U, X, y)
            newLW, newLU = virtualGradient(weight_W, weight_U, GW, GU,self.beta,self.lamb)
            del GW,GU


            PW, PU = adam(newLW, newLU, m_w, m_u, v_w, v_u, self.beta1, self.beta2, it, self.alpha, self.epison)

            new_weight_W, new_weight_U = weight_W + PW, weight_U + PU

            del PW, PU

            new_weight_W = fixOrthant(newLW, weight_W, new_weight_W)
            new_weight_U = fixOrthant(newLU, weight_U, new_weight_U)
            loss_now = calLoss(X, y, new_weight_W, new_weight_U, self.lamb, self.beta)
            if(loss_now < loss_best):
                loss_best = loss_now
                best_iter = it
                best_weight_W = new_weight_W
                best_weight_U = new_weight_U
            if(loss_before < loss_now):
                optimize_counter += 1
                if(optimize_counter >= 10):
                    self.weight_U = best_weight_U
                    self.weight_W = best_weight_W
                    print("use time: ", time.time() - start_time)
                    print("The best result get at %d iteration with loss: %f" % (best_iter, loss_best))
                    return "Done!"
            else:
                optimize_counter = 0
            

            weight_U = new_weight_U
            weight_W = new_weight_W
            LW = newLW
            LU = newLU
            del newLW
            del newLU
            del new_weight_U
            del new_weight_W
            loss_before = loss_now
            

            it += 1


        logging.info("============iterator : %s end ==========" % it)
        print("")

        print("The best result get at %d iteration with loss: %f" % (best_iter, loss_best))
        print("Done!")
        self.weight_W = best_weight_W
        self.weight_U = best_weight_U
        
    def predict_proba(self, X):
        N, p = X.shape
        if self.intercept:
            pad = np.ones((N, p + 1))
            pad[:,:-1] = X
            X = pad
            del pad
        return mlr_total(self.weight_W,self.weight_U,X)

    def predict(self, X):
        return np.array(self.predict_proba(X) > 0.5, dtype = int)



In [3]:
# test
X1 = np.random.normal(1,0.2, (100, 10))
X2 = np.random.normal(-1,0.2, (100,10))
X = np.vstack((X1,X2))
y = np.zeros(200)
for i in range(0,100):
    y[i] = 1
ls = LSPLM(iterNum=1000,lamb = 2,beta = 2,pieceNum=2)
ls.fit(X,y)
y_test = ls.predict_proba(X)

(104.71015028827763, 1.6745184058234668, 6.2767485023490419)
iter:1, loss:112.661417196
(103.85883582030512, 1.6693837853493874, 6.2527485033319969)
iter:2, loss:111.780968109
(103.01314419365296, 1.6643486987870173, 6.2287485741955537)
iter:3, loss:110.906241467
(102.17306009406543, 1.6594140446424424, 6.2047486733852955)
iter:4, loss:110.037222812
(101.33856853405024, 1.6545807179862935, 6.1807487809966775)
iter:5, loss:109.173898033
(100.50985043313024, 1.6500465514714082, 6.1575971210021745)
iter:6, loss:108.317494106
(99.686927555613181, 1.6458863956381524, 6.1355972136257222)
iter:7, loss:107.468411165
(98.869505583904157, 1.6418350793790726, 6.1135973057702797)
iter:8, loss:106.624937969
(98.05756930311648, 1.637893410498686, 6.0915973997223709)
iter:9, loss:105.787060113
(97.251103223525732, 1.634062182481774, 6.0695974960318955)
iter:10, loss:104.954762902
(96.450091602139182, 1.6303421739099664, 6.0475975948228289)
iter:11, loss:104.128031371
(95.67413333121965, 1.62672608795

array([ 0.96065779,  0.96503679,  0.96699309,  0.95613442,  0.96193205,
        0.95871504,  0.97732789,  0.97076397,  0.96618551,  0.96576551,
        0.94877668,  0.96942812,  0.96610376,  0.96545199,  0.95890673,
        0.96313372,  0.95998193,  0.96659788,  0.96955235,  0.95536704,
        0.9741793 ,  0.97218439,  0.96954483,  0.96513589,  0.96765354,
        0.96653459,  0.97802478,  0.96509028,  0.95627835,  0.96419756,
        0.96735744,  0.96951622,  0.96230106,  0.96809348,  0.9448604 ,
        0.96533035,  0.96304899,  0.97207769,  0.96679831,  0.9697054 ,
        0.95318709,  0.96749771,  0.96555437,  0.97614298,  0.97301909,
        0.96516836,  0.96826888,  0.96997545,  0.96849338,  0.96146869,
        0.96884667,  0.97360166,  0.97028115,  0.96461362,  0.97348692,
        0.96965474,  0.95248886,  0.96447479,  0.96767502,  0.96601398,
        0.96289302,  0.95965211,  0.97073016,  0.96412835,  0.97264826,
        0.95731837,  0.97177864,  0.96784112,  0.97354816,  0.97

In [None]:
z = np.array([[1,2,3],[1,2,3]])

In [None]:
z[::]=z[::]

In [None]:
z