In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean
from sklearn.metrics import roc_auc_score


In [2]:
df = pd.read_csv("data-logistic.csv", header=None)


In [3]:
df


Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.407750
...,...,...,...
200,1,4.245176,3.053931
201,1,2.437935,1.357804
202,-1,-1.876638,1.533398
203,1,-6.824446,-13.934211


In [4]:
def gradient_descent(
    X: np.ndarray, y: np.ndarray, k: float = 0.1, C: float = 10.0, max_iter: int = 10000
) -> np.ndarray:
    w = np.zeros(2)

    for i in range(max_iter):
        prev_w = w

        w = (
            w
            + k
            * ((y * X.T) * (1 - (1 / (1 + np.exp(-y * (w * X).sum(axis=1)))))).T.mean(
                axis=0
            )
            - k * C * w
        )
        if euclidean(prev_w, w) < 1e-5:
            break

    return np.round(w, 3)


def sigmoid(X, w):
    return 1 / (1 + np.exp(-(w * X).sum(axis=1)))


In [5]:
y, X = df[0].values, df[[1, 2]].values


In [6]:
w = gradient_descent(X, y, C=0)
w_l2 = gradient_descent(X, y)
print("Веса без регуляризации:", "\n", w)
print("Веса с регуляризацией L2:", "\n", w_l2)


Веса без регуляризации: 
 [0.288 0.092]
Веса с регуляризацией L2: 
 [0.029 0.025]


In [7]:
roc_auc = np.round(roc_auc_score(y, sigmoid(X, w)), 3)
l2_roc_auc = np.round(roc_auc_score(y, sigmoid(X, w_l2)), 3)
print(roc_auc, l2_roc_auc)


0.927 0.936
