In [357]:
import pandas as pd
import numpy as np
from numpy import linalg
from math import inf, sqrt
import time
  

In [358]:
Data = pd.read_csv('data-logistic.csv', names=['target', 'firstFeature', 'secondFeature'])
Y = Data['target']
X = Data.drop('target', axis=1)
Data.head()

Unnamed: 0,target,firstFeature,secondFeature
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [367]:
def w1_function(w1, w2, k, C, X, Y):
    L = Y.size
    b = list(map(lambda x: 1.0 - (1.0/(1.0 + np.exp(-x, dtype=np.float128))), Y * (X['firstFeature']*w1 + X['secondFeature']*w2)))
    return w1 + (k*(1.0/L))*sum(Y*X['firstFeature']*b) - k*C*w1


def w2_function(w1, w2, k, C, X, Y):
    L = Y.size
    b = list(map(lambda x: 1.0 - (1.0/(1.0 + np.exp(-x, dtype=np.float128))), Y * (X['firstFeature']*w1 + X['secondFeature']*w2)))
    return w2 + (k*(1.0/L))*sum(Y*X['secondFeature']*b) - k*C*w2


In [368]:
# Градиентный спуск

def grad(k, X, Y, w1=0., w2=0., eps=1e-5, C=0.0, max_iterations=10000):
    w1_new = w1_function(w1, w2, k=k, C=C, X=X, Y=Y)
    w2_new = w2_function(w1, w2, k=k, C=C, X=X, Y=Y)
    i = 1
    while sqrt((w2_new - w2)**2 + (w1_new - w1)**2) > eps and i <= max_iterations:
        w1_temp = w1_new
        w2_temp = w2_new
        w1_new = w1_function(w1_new, w2_new, k=k, C=C, X=X, Y=Y)
        w2_new = w2_function(w1_new, w2_new, k=k, C=C, X=X, Y=Y)
        w1 = w1_temp
        w2 = w2_temp
        i += 1
    return [w1_new, w2_new] 

In [369]:
%%time
output_list_l2_reg = grad(k=0.1, C=10., X=X, Y=Y)
output_list_simple = grad(k=0.1, C=0., X=X, Y=Y)

CPU times: user 586 ms, sys: 13.5 ms, total: 599 ms
Wall time: 587 ms


In [370]:
output_list_l2_reg

[0.028559638503823442385, 0.024780841099558656517]

In [371]:
 output_list_simple

[0.28781531452416753215, 0.09197633380934426815]

In [372]:
from sklearn.metrics import roc_auc_score


def a(w1, w2, X):
    return 1./(1. + np.exp(- w1*X['firstFeature'] - w2*X['secondFeature'], dtype=np.float128))

In [373]:
y_res_l2 = a(output_list_l2_reg[0], output_list_l2_reg[1], X)
y_res_no_l2 = a(output_list_simple[0], output_list_simple[1], X)

In [374]:
auc = roc_auc_score(Y, y_res_no_l2)
rauc = roc_auc_score(Y, y_res_l2)

# Запись ответа
output1 = open('week3.2_qw.txt', 'w')
print(
    ' '.join(map(lambda x: str(round(x, 3)), [auc, rauc])),
    file=output1, 
    end=''
)
output1.close()