In [1]:
import numpy as np
import pandas as pd
import math
from sklearn import *

In [40]:
data = pd.read_csv('data-logistic.csv', engine = 'python', header = None)

In [41]:
y = data[0]
X = np.array(data.drop([0], axis = 1))
h = 0.1
C = 10
w0 = np.array([0,0])
max_iter = 10000
tol = 1e-5

In [43]:
def grad_descent_step(X, y, w_prev, h, C, reg):
    
    grad_step = 0 
    
    for i in range(0, len(y)):
        
        grad_step += h/len(y) * (y[i] * X[i, :] * (1 - 1/(1 + math.exp(-y[i] * np.dot(w_prev, X[i, :])))))
        
    if reg == True:
        
        grad_step -= h * C * w_prev
    
    return w_prev + grad_step

In [48]:
X.shape

(205, 2)

In [46]:
def grad_descent(X, y, w0, h, C, max_iter, tol):
    
    weight_dist = np.inf
    # Сюда будем записывать ошибки на каждой итерации
    
    w = w0
    
    # Счетчик итераций
    iter_num = 0
        
    # Основной цикл
    while weight_dist > tol and iter_num < max_iter:
        
        # порождаем псевдослучайный 
        # индекс объекта обучающей выборки
        
        w_prev = w
        
        w = grad_descent_step(X, y, w, h, C, True)
        
        weight_dist = np.linalg.norm(w - w_prev)
        
        iter_num += 1
        
        print(iter_num, weight_dist)
        
    return w

In [45]:
w = grad_descent(X, y, w0, h, C, max_iter, tol)

1 0.048273938453907254
2 0.03507940465569505
3 0.026903330054023948
4 0.02159360872862849
5 0.017858691411466173
6 0.01506851961866618
7 0.012895969104778535
8 0.011155147129940408
9 0.009731656226247186
10 0.008550374904120235
11 0.007559334359506352
12 0.006721042005179072
13 0.006007509756799564
14 0.005397248370116867
15 0.004873365747228713
16 0.004422316571942783
17 0.0040330533519663805
18 0.003696434570889087
19 0.0034048031718707217
20 0.0031516811277175105
21 0.0029315448783532507
22 0.0027396579129037356
23 0.002571943984327265
24 0.0024248891848171893
25 0.0022954644035261492
26 0.002181062099110165
27 0.0020794431447244844
28 0.0019886908910890792
29 0.0019071706225770747
30 0.0018334933066002789
31 0.0017664830105671003
32 0.0017051476393595939
33 0.0016486527854421376
34 0.0015962985333228246
35 0.0015474990600396912
36 0.0015017648520587786
37 0.001458687334276237
38 0.0014179256881811436
39 0.0013791956273099392
40 0.0013422598990377153
41 0.001306920290843463
42 0.001

In [47]:
w_reg = grad_descent(X, y, w0, h, C, max_iter, tol)

1 0.048273938453907254
2 0.01325878033269942
3 0.003490189757421508
4 0.0009357171435694727
5 0.00024974376394042814
6 6.673864270317637e-05
7 1.7828658929372044e-05
8 4.76318913957636e-06


In [57]:
def sigmoid(X, w, y):
    
    y_prob = np.zeros((X.shape[0]))
    
    for i in range(0, X.shape[0]):
        
        y_prob[i] = 1/(1 + math.exp(-np.dot(w, X[i, :])))
        
    return y_prob

In [58]:
sigmoid(X, w, y)

array([0.44922586, 0.69020649, 0.42798423, 0.6983426 , 0.61440485,
       0.58839854, 0.70203837, 0.52888936, 0.41971211, 0.59584612,
       0.4997746 , 0.49906568, 0.47920412, 0.59768339, 0.64958906,
       0.5687875 , 0.47086925, 0.62138228, 0.66983526, 0.74475216,
       0.54577233, 0.01543129, 0.41544251, 0.44921358, 0.61272547,
       0.53710786, 0.69773006, 0.75864364, 0.47126841, 0.58722554,
       0.46196938, 0.48523066, 0.78569546, 0.7499381 , 0.51786719,
       0.660825  , 0.70493808, 0.66732057, 0.58362091, 0.69606071,
       0.64785173, 0.44015592, 0.59468979, 0.46013921, 0.53033774,
       0.64207864, 0.8042442 , 0.47228757, 0.55307849, 0.55490638,
       0.65095744, 0.58562266, 0.57476233, 0.70849238, 0.43468645,
       0.4709801 , 0.50374559, 0.70893964, 0.57090302, 0.70239483,
       0.6692237 , 0.65309035, 0.6969174 , 0.7260847 , 0.53189639,
       0.64652243, 0.53656743, 0.00550396, 0.56367596, 0.66559481,
       0.64259021, 0.51360061, 0.75324991, 0.45684737, 0.43295

In [62]:
metrics.roc_auc_score(y, sigmoid(X, w, y))

0.9268571428571428

In [64]:
file = open('answer_3_1.txt', 'w')
file.write('{} {}'.format(round(metrics.roc_auc_score(y, sigmoid(X, w, y)), 3), round(metrics.roc_auc_score(y, sigmoid(X, w_reg, y)), 3)))
file.close()