In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR
np.random.seed(42)

In [55]:
def score(X, W):
    scr = np.dot(X, W)
    return scr

In [56]:
def sigmoid(z):
    a = 1.0 / (1.0 + np.exp(-z))
    return a

In [57]:
def sigmoid_derive(a):
    d = a * (1.0 - a)
    return d

In [58]:
def f_log_loss(y_true, p):
    log_loss = np.mean(-y_true * np.log(p) - (1.0 - y_true) * np.log(1.0 - p))
    return log_loss

In [103]:
n_features = 2
x_size = 10000
X = np.random.sample((x_size, n_features+1))
X[:, 0] = 1
X

array([[ 1.        ,  0.44965709,  0.02625628],
       [ 1.        ,  0.04964825,  0.55845583],
       [ 1.        ,  0.45288916,  0.11473355],
       ..., 
       [ 1.        ,  0.68568967,  0.73464453],
       [ 1.        ,  0.28394877,  0.50914923],
       [ 1.        ,  0.61873581,  0.22479946]])

In [104]:
W_true = np.array([
    [-0.4],
    [0.4],
    [0.5]
])

In [105]:
y_true = np.round(sigmoid(score(X, W_true)))
y_true.mean()

0.59619999999999995

In [106]:
#! Add noise
X[:, 1:] += np.random.sample(X.shape)[:, 1:] / 1.5
X

array([[ 1.        ,  0.67705109,  0.65383655],
       [ 1.        ,  0.67441357,  0.77546924],
       [ 1.        ,  0.90589385,  0.35441067],
       ..., 
       [ 1.        ,  1.14198931,  0.99340631],
       [ 1.        ,  0.58902349,  0.85442808],
       [ 1.        ,  1.10888032,  0.22627589]])

#### #Newton method

In [119]:
#initialize W
W_init = np.array([
    [-0.25],
    [0.95],
    [-0.45]
]) / 10
print(W_init, "\n\n",W_true)

[[-0.025]
 [ 0.095]
 [-0.045]] 

 [[-0.4]
 [ 0.4]
 [ 0.5]]


In [None]:
lam = 200 # L2 regularization 
alpha = 0.003
iter1 = 100
W_prev, W = np.zeros(W.shape), np.copy(W_init)
for i in range(iter1):
    #probability using current W
    p = sigmoid(score(X, W))
    p_derive = sigmoid_derive(p)
    #Hessian
    A = np.copy(X)
    B = np.identity(x_size) * p_derive
    H = np.dot(np.dot(A.T, B), A)
    H_inv = np.linalg.inv(H + lam*np.eye(W.shape[0]))  # 3x3
    #gradient
    grad = np.dot(A.T, (p - y_true)) #3x1
    #print(H_inv.shape, grad.shape)
    #step = np.linalg.lstsq(Hf.T, grad.T)[0]
    #print('step.shape: {}'.format(step.shape))
    step = np.dot(H_inv, grad)
    #new W
    W -= alpha * step
    #gini
    auc = roc_auc_score(y_true, p)
    gini = 200 * auc - 100
    log_loss = f_log_loss(y_true, p)
    if i % (iter1 // 10) == 0:
        print('{}: gini = {}, log_loss = {}'.format(i, gini, log_loss))
        print(W)
        #for i in range(10):
            #print(int(y_true[i, 0]), p[i, 0])
        print('-' * 20)
    
    if np.max(np.abs(W - W_prev)) > 1e-8:
        W_prev = np.copy(W)
        continue
    else:
        print('The weight difference is small. Process stopped.')
        break

0: gini = 15.859158524045071, log_loss = 0.6894635536989396
[[-0.02916719]
 [ 0.09748021]
 [-0.04076173]]
--------------------
10: gini = 45.399042873314045, log_loss = 0.6826802700833441
[[-0.07065072]
 [ 0.12211186]
 [ 0.00127095]]
--------------------
20: gini = 62.00111852530114, log_loss = 0.6761312770221675
[[-0.11179831]
 [ 0.14644188]
 [ 0.04268418]]
--------------------
30: gini = 70.46284051926023, log_loss = 0.6698045480015583
[[-0.15261871]
 [ 0.17048158]
 [ 0.08350094]]
--------------------


In [114]:
lr = LR()
lr.fit(X, y_true.ravel())
pred2 = lr.predict_proba(X)[:, 1]
print(200.0 * roc_auc_score(y_true.ravel(), pred2.ravel()) - 100)
lr.coef_

81.4845681889


array([[-3.91208425,  4.40717924,  5.91972339]])