In [19]:
import pandas as pd
from sklearn.metrics import roc_auc_score
import numpy as np
import math
from functools import reduce

In [3]:
data = pd.read_csv('data-logistic.csv', header=None)

In [4]:
data.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [18]:
Y = data[0]
X = [data[1], data[2]]

In [40]:
inv_sigma = lambda x, w, y: 1 + np.exp(-y*(w[0]*x[0]+w[1]*x[1]))

In [48]:
def sugma1_sum(w, y, x):
    res = 0
    for i in range(len(y)):
        res += x[0][i] * y[i] * (1 - 1 / (inv_sigma([x[0][i], x[1][i]], w, y[i])) )
    return res
    
def sugma2_sum(w, y, x):
    res = 0
    for i in range(len(y)):
        res += x[1][i] * y[i] * (1 - 1 / (inv_sigma([x[0][i], x[1][i]], w, y[i])) )
    return res

In [67]:
def weight_step(w, y, x, k=0.1, c=10):
    new_w = []  
    new_w.append(w[0] + (k/len(y)) * sugma1_sum(w, y, x) - k * c * w[0])
    new_w.append(w[1] + (k/len(y)) * sugma2_sum(w, y, x) - k * c * w[1])
    return new_w

In [59]:
w = weight_step([0, 0], Y, X, 0.1, 10)
for _ in range(10):
    w = weight_step(w, Y, X, 0.1, 10)

weight: [0, 0]
weight: [0.03573126639874109, 0.03245997127838869]
weight: [0.026699107895997144, 0.022753511125887887]
weight: [0.029057389054538967, 0.02532643446716265]
weight: [0.028426566376644737, 0.02463532715145532]
weight: [0.02859493642710941, 0.024819781681715954]
weight: [0.028549946772457632, 0.024770486880730656]
weight: [0.02856196551701265, 0.024783655436404754]
weight: [0.028558754546234223, 0.024780137249735552]
weight: [0.028559612385338135, 0.024781077162517304]
weight: [0.02855938320494622, 0.024780826055178465]


In [75]:
def log_sum(w, y, x, func):
    res = 0
    for i in range(len(y)):
        res += func(i)
    return res

In [81]:
def main_func(w, y, x, num, k=0.1, c=10):
    if num == 0:
        return
    log_sigm = lambda i: math.log(inv_sigma([x[0][i], x[1][i]], w, y[i]))
    res = log_sum(w, y, x, log_sigm)/len(y) + 1/2 * c * math.sqrt(w[0]*w[1])
    w_new = weight_step(w, y, x, k, c)
    if (abs(w_new[0] - w[0]) < 1e-5) and (abs(w_new[1] - w[1]) < 1e-5):
        print('res: {}'.format(str(w)))
        return
    print('weight: {}, main: {}'.format(str(w), str(res)))
    main_func(w_new, y, x, num-1, k, c)

main_func([0, 0], Y, X, 10000, 0.1, 0)

weight: [0, 0], main: 0.6931471805599465
weight: [0.03573126639874109, 0.03245997127838869], main: 0.673091462221579
weight: [0.06243037429473824, 0.05521348240427658], main: 0.662263034329036
weight: [0.08343427766831138, 0.07202494311027564], main: 0.6557574322623992
weight: [0.10067272600016682, 0.08502955141022268], main: 0.6515057709750507
weight: [0.11522226961113709, 0.09538540191331751], main: 0.6485694566424615
weight: [0.1277368954185646, 0.10377851959378628], main: 0.6464646283203775
weight: [0.13864802866699205, 0.11065261519877048], main: 0.6449150469025037
weight: [0.14825909445507238, 0.11631518722107709], main: 0.6437508526583247
weight: [0.15679400747483294, 0.12099069710246728], main: 0.6428618117968218
weight: [0.1644241120056825, 0.12484950726225981], main: 0.6421734730290091
weight: [0.1712842172222513, 0.12802480132366872], main: 0.6416340092367986
weight: [0.1774827070945897, 0.13062309479013856], main: 0.6412064875321885
weight: [0.18310822687402223, 0.132731103

In [82]:
main_func([0, 0], Y, X, 10000, 0.1, 10)

weight: [0, 0], main: 0.6931471805599465
weight: [0.03573126639874109, 0.03245997127838869], main: 0.8433732787085128
weight: [0.026699107895997144, 0.022753511125887887], main: 0.8011783602291046
weight: [0.029057389054538967, 0.02532643446716265], main: 0.8122596525448457
weight: [0.028426566376644737, 0.02463532715145532], main: 0.809286863722742
weight: [0.02859493642710941, 0.024819781681715954], main: 0.8100801823468186
weight: [0.028549946772457632, 0.024770486880730656], main: 0.809868175893284
res: [0.02856196551701265, 0.024783655436404754]


In [89]:
without_L2 = []
for i in range(len(Y)):
    without_L2.append(1/inv_sigma([X[0][i], X[1][i]], [0.28769331904259177, 0.09209270103474641], 1))
round(roc_auc_score(Y, without_L2), 3)

0.927

In [90]:
with_L2 = []
for i in range(len(Y)):
    with_L2.append(1/inv_sigma([X[0][i], X[1][i]], [0.02856196551701265, 0.024783655436404754], 1))
round(roc_auc_score(Y, with_L2), 3)

0.936