In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import exp
%matplotlib inline

In [2]:
data = pd.read_csv('./data-logistic.csv', header=None)
data.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [13]:
X = np.array(data[[1, 2]])
y = np.array(data[0])

In [15]:
def gradient_step(X, y, w, k = 0.1):
    s1 = 0
    s2 = 0
    for i in range(len(y)):
        tmp = (1 - 1 / (1 + exp(-y[i] * (w[0] * X[i][0] + w[1] * X[i][1]))))
        s1 += y[i] * X[i][0] * tmp
        s2 += y[i] * X[i][1] * tmp
    w1 = w[0] + k / len(y) * s1
    w2 = w[1] + k / len(y) * s2
    return np.array([w1, w2])

In [19]:
def gradient_step_reg(X, y, w, k = 0.1, C = 10):
    w_new = gradient_step(X, y, w, k)
    w_new[0] -= k * C * w[0]
    w_new[1] -= k * C * w[1]
    return w_new

In [20]:
def gradient_descent(X, y, w_init, k = 0.1, reg = False,
                     max_iter = 1e4, min_weight_dist = 1e-5):
    weight_dist = np.inf
    w = w_init
    iter_num = 0
    while weight_dist > min_weight_dist and iter_num < max_iter:
        if reg:
            w_new = gradient_step_reg(X, y, w, k)
        else:
            w_new = gradient_step(X, y, w, k)
        weight_dist = np.linalg.norm(w - w_new, ord = 2)
        w = w_new
        iter_num += 1
    return w

In [21]:
w_init = np.array([0, 0])
w_us = gradient_descent(X, y, w_init)
w_reg = gradient_descent(X, y, w_init, reg=True)

In [22]:
def sigma(x, w):
    tmp = exp(-w[0] * x[0] - w[1] * x[1])
    return 1 / (1 + tmp)

In [23]:
from sklearn.metrics import roc_auc_score
y_scores_us = [sigma(X[i], w_us) for i in range(len(y))]
y_scores_reg = [sigma(X[i], w_reg) for i in range(len(y))]
auc_us = roc_auc_score(y, y_scores_us)
auc_reg = roc_auc_score(y, y_scores_reg)
auc_us, auc_reg

(0.92685714285714282, 0.93628571428571417)