In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score

data = pd.read_csv('data-logistic.csv').values
y = np.array(data[:,0]).reshape(len(data),1)
x = np.array(data[:,1:]).reshape(len(data),2)

In [2]:
def cost_function(x,y,w,c=0):
    return (1/len(y))*np.sum((np.log(1+np.exp(np.multiply(-y,np.dot(x,w)))))) + 0.5*c*(np.linalg.norm(w) ** 2) 

def grad_step(w,lr=0.1,c=0):
    new_w1 = w[0] - lr*c*w[0] + (lr/len(y))*np.sum(np.multiply(y,np.multiply(x[:,0:1],(1-1/(1+np.exp(np.multiply(-y,np.dot(x,w))))))))
    new_w2 = w[1] - lr*c*w[1] + (lr/len(y))*np.sum(np.multiply(y,np.multiply(x[:,1:],(1-1/(1+np.exp(np.multiply(-y,np.dot(x,w))))))))
    return np.array([new_w1, new_w2]).reshape(2,1)

In [3]:
def fit(w,lr,c=0,eps=1e-5):
    for iter in range(10000):
        old_cost = cost_function(x,y,w,c=c)
        print('Iter: %d, cost: %f, w: %f %f' % (iter, old_cost, w[0], w[1]))
        if old_cost > eps:
            w = grad_step(w,lr=lr,c=c)
        else:
            break
        if round(old_cost,5) == round(cost_function(x,y,w,c=c),5):
            break
    return w

def get_prob(x,w):
    return 1/(1+np.exp(np.dot(x,w)))

In [4]:
res1 = round(0.9311428571428569,3)
res2 = round(0.9363809523809523,3)

In [7]:
w = np.array([0,0]).reshape(2,1)
lr, c = 0.1, 10
w = fit(w=w, lr=lr, c=0)
roc_auc_score(y, get_prob(x,w))

Iter: 0, cost: 0.693147, w: 0.000000 0.000000
Iter: 1, cost: 0.673091, w: 0.035731 0.032460
Iter: 2, cost: 0.662263, w: 0.062430 0.055213
Iter: 3, cost: 0.655757, w: 0.083434 0.072025
Iter: 4, cost: 0.651506, w: 0.100673 0.085030
Iter: 5, cost: 0.648569, w: 0.115222 0.095385
Iter: 6, cost: 0.646465, w: 0.127737 0.103779
Iter: 7, cost: 0.644915, w: 0.138648 0.110653
Iter: 8, cost: 0.643751, w: 0.148259 0.116315
Iter: 9, cost: 0.642862, w: 0.156794 0.120991
Iter: 10, cost: 0.642173, w: 0.164424 0.124850
Iter: 11, cost: 0.641634, w: 0.171284 0.128025
Iter: 12, cost: 0.641206, w: 0.177483 0.130623
Iter: 13, cost: 0.640864, w: 0.183108 0.132731
Iter: 14, cost: 0.640587, w: 0.188234 0.134420
Iter: 15, cost: 0.640361, w: 0.192923 0.135751
Iter: 16, cost: 0.640174, w: 0.197225 0.136773
Iter: 17, cost: 0.640018, w: 0.201187 0.137529
Iter: 18, cost: 0.639887, w: 0.204845 0.138056
Iter: 19, cost: 0.639775, w: 0.208234 0.138385
Iter: 20, cost: 0.639680, w: 0.211382 0.138544
Iter: 21, cost: 0.63959

0.5

In [6]:
w = np.array([0,0]).reshape(2,1)
lr, c = 0.1, 10
w = fit(w=w, lr=lr, c=c)
roc_auc_score(y, get_prob(x,w))

Iter: 0, cost: 0.693147, w: 0.000000 0.000000
Iter: 1, cost: 0.684743, w: 0.035731 0.032460
Iter: 2, cost: 0.684094, w: 0.026699 0.022754
Iter: 3, cost: 0.684049, w: 0.029057 0.025326


0.5