In [221]:
import pandas as pd
import itertools
from numpy import exp
import numpy as np
from scipy.spatial import distance

In [222]:
path_to_data = "C:/study/data/"

In [223]:
df = pd.read_csv(path_to_data + "data-logistic.csv", header=None)

In [224]:
df.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [225]:
y = df[df.columns[0]]
X = df[df.columns[1:]]
x1 = X[1]
x2 = X[2]

In [226]:
def sigmoid(x):
    return (1 / (1+exp(-x)))

In [252]:
def gradient_step(k, C, w1, w2, x1, x2, y):
    """
    function for weights update
    k - step, C - regularization coefficient, 
    w1, w2 - initial weights
    y - target vector
    """

    inkrement1=0
    inkrement2=0
    for i in range(0,len(y)):
        inkrement1 += (k/len(y)) * (y[i]*x1[i]) * (1 - sigmoid(y[i] * (w1*x1[i] + w2*x2[i])))
        inkrement2 += (k/len(y)) * (y[i]*x2[i]) * (1 - sigmoid(y[i] * (w1*x1[i] + w2*x2[i])))
    w1 += inkrement1 - k*C*w1
    w2 += inkrement2 - k*C*w2
    return w1,w2



In [253]:
def gradient_descent(k, C,  w1, w2, x1, x2, y, iterations=1e4, stop=1e-5):
    i = 0
    while True:
        i+=1
        w1_s = w1
        w2_s = w2
        w1, w2 = gradient_step(k=k, C=C, w1=w1, w2=w2, x1=x1, x2=x2, y=y)
        if distance.euclidean((w1,w2),(w1_s,w2_s)) < stop or i > iterations:
            break
    return (w1,w2)
    

In [254]:
def get_probs(w1,w2,x1,x2):
    probs = []
    for i in range(0, len(x1)):
        probs.append(sigmoid(w1*x1[i] + w2*x2[i]))
    return probs

In [255]:
w1,w2 = gradient_descent(k=0.1, C=10, w1=0, w2=0, x1=x1, x2=x2, y=y)

In [256]:
w1, w2

(0.02855875454623422, 0.024780137249735528)

In [232]:
probs = get_probs(w1,w2,x1,x2)

In [233]:
probs

[0.4944025475025294,
 0.5294960008513981,
 0.4957368033419395,
 0.5282277128147582,
 0.520966409819608,
 0.5062604414439291,
 0.5300823367598685,
 0.508618029218254,
 0.4887377640717866,
 0.5175760055944163,
 0.5019164771626575,
 0.5015494554061638,
 0.4934116084179819,
 0.5055899757833094,
 0.5222836185848817,
 0.5043425922116569,
 0.49295960028999414,
 0.5191125020027608,
 0.5219557272753146,
 0.5348921372802882,
 0.5021801475793493,
 0.381319645530652,
 0.48731919498878196,
 0.4920696728364942,
 0.5120357324177529,
 0.5039500221633949,
 0.5306912927485266,
 0.5410776318394168,
 0.4983937869387338,
 0.5135725993580922,
 0.49451210360457637,
 0.5024753364374631,
 0.546004822044189,
 0.5326205306762868,
 0.5073897010518211,
 0.5137854596695193,
 0.5341760293556234,
 0.5230097578402351,
 0.507203224404265,
 0.5238303526560476,
 0.5182801518655145,
 0.49704680889686764,
 0.5127011195651381,
 0.49238437577399274,
 0.5070329898537863,
 0.5197279682757737,
 0.5438830405783189,
 0.4972685428

In [234]:
from sklearn.metrics import roc_auc_score

#### with regularization 

In [235]:
round(roc_auc_score(y, probs), 3)

0.936

#### without regularization

In [236]:
w1,w2 = gradient_descent(k=0.1, C=0, w1=0, w2=0, x1=x1, x2=x2, y=y)

In [237]:
w1,w2

(0.2878116204717764, 0.09198330215925439)

In [238]:
probs = get_probs(w1,w2,x1,x2)

In [239]:
round(roc_auc_score(y, probs), 3)

0.927

In [244]:
res1 = []
for i,j,k in zip(y,x1,x2):
    res1.append((i,j,k))


In [245]:
res2 = []
for i in range(0,len(y)):
    res2.append((y[i], x1[i], x2[i]))

[(-1, -0.6638265368249999, -0.138525716794),
 (1, 1.9945955012799998, 2.46802467614),
 (-1, -1.24739491636, 0.749424636303),
 (1, 2.30937424866, 1.89983555937),
 (1, 0.849143314149, 2.40774982339),
 (1, 1.4542709504, -0.6654157090030001),
 (1, 2.25422743269, 2.2637858535),
 (-1, -0.06757951660559999, 1.46914109556),
 (-1, -0.861960913075, -0.824855578346),
 (1, 0.699178929108, 2.03248760563),
 (-1, -0.16147996103599999, 0.495461854788),
 (-1, -0.14710226136600002, 0.41964653929300005),
 (-1, 0.0802959559549, -1.1560970761),
 (-1, 1.72065543353, -1.08066154412),
 (1, 1.57395764659, 1.7854335648),
 (-1, 1.16844517782, -0.645619241065),
 (-1, -0.06662736015, -1.05974658502),
 (1, 1.1633547323, 1.7458867561099998),
 (1, 2.0969697873400004, 1.1296376586599999),
 (1, 3.03570513408, 2.14283779581),
 (-1, 0.831852058628, -0.606776832016),
 (1, -12.977800215, -4.57305579231),
 (-1, -0.842603085438, -1.07628156079),
 (-1, -0.47353672342299996, -0.734473330818),
 (-1, 1.5403731257100002, 0.167920